/* Do-file to generate the appendix tables and figures for "Gender Differences in Job Search and the Earnings Gap: Evidence from the Field and the Lab"
(QJE, 2023) by Cortes, Pan, Pilossoph, Reuben, and Zafar 
Last updated: Jan 2023 */

clear all
set type double, permanently

cd "/Users/jessicapan/JPan Dropbox/Jessica Pan/current_projects/BU Job Search Analysis/Submission/qje/replication_materials"

global figures "/Users/jessicapan/JPan Dropbox/Jessica Pan/current_projects/BU Job Search Analysis/Submission/qje/replication_materials/figures/"

***************************************************************************
* Figure A1: Mean Accepted Earnings by Months Since Graduation and Gender
***************************************************************************

* tables_figures_june2022/earn_bymonth.pdf

use BU_grad_analysis_sample_aug2021.dta, clear //reset dataset
keep if accepted==1
keep if abs(accept_mo)<=15
	
gen male = 1-female
replace first_industry=13 if accepted==1 & missing(first_industry)

gen first_benefit_matorpat= (first_benefit_maternity==1 | first_benefit_paternity==1) if !missing(first_benefit_maternity) & !missing(first_benefit_maternity)
	
foreach var of varlist exp_earn_growth_1yr{
	gen `var'_miss=missing(`var')
	replace `var'=0 if missing(`var')
	}

gen first_total_nt_f = first_total_nt if female==1
gen first_total_nt_m = first_total_nt if female==0

replace accept_mo = -9 if accept_mo<=-9
replace accept_mo = 9 if accept_mo>=9

gen accept_late_6mo = accept_mo>6
gen accept_late_9mo = accept_mo>=9

sum accept_late_6mo accept_late_9mo
sum accept_late_6mo accept_late_9mo if female==1
sum accept_late_6mo accept_late_9mo if female==0

/*
. sum accept_late_6mo accept_late_9mo

    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
accept_l~6mo |      1,358     .107511     .309876          0          1
accept_l~9mo |      1,358    .0618557     .240982          0          1

. sum accept_late_6mo accept_late_9mo if female==1

    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
accept_l~6mo |        736    .0788043    .2696164          0          1
accept_l~9mo |        736    .0502717    .2186537          0          1

. sum accept_late_6mo accept_late_9mo if female==0

    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
accept_l~6mo |        622    .1414791    .3487956          0          1
accept_l~9mo |        622    .0755627    .2645099          0          1
*/

collapse first_total_nt_f first_total_nt_m (count) N_f = first_total_nt_f N_m = first_total_nt_m, by(accept_mo) 

# delimit ;
graph twoway connect first_total_nt_m accept_mo, msymbol(none) mla(N_m) mlabsize(vsmall) || 
connect first_total_nt_f accept_mo, lpattern(dash) msymbol(none) mla(N_f) mlabsize(vsmall)
xtitle("Months Since Graduation", size(small)) ylabel(,labsize(small)) xlabel(-9(3)9, labsize(small))
graphregion(color(white)) legend(label(1 "Male") label(2 "Female") size(small)) ytitle("Accepted Offer ($)", size(small)) ;
graph export "${figures}figureA1.pdf", replace;

******************************************************
* Figure A2: Importance of Having a Job by Graduation
******************************************************

* tables_figures_june2022/afig1_imp_job_bygrad.pdf

	use BU_merge_master_figA2.dta, clear		
	encode Ona5pointscalehowimportan_1, gen(important_job_b4_grad)
	gen gender=gender_1

	gen fem_cat=.
 	gen male_cat=.
	gen male_percent=.
	gen fem_percent=.
	count if gender==2 & !missing(important_job_b4_grad)
	local num_male=r(N)
	count if gender==1 & !missing(important_job_b4_grad)
	local num_fem=r(N)
	forval i=1/5{
		replace fem_cat=`i'+0.15 in `i'
		replace male_cat=`i'-0.15 in `i'
		count if important_job_b4_grad==`i' & gender==2
		replace male_percent=r(N)/`num_male' in `i'
		count if important_job_b4_grad==`i' & gender==1
		replace fem_percent=r(N)/`num_fem' in `i'
	}
	
	forval g=1/2{
		sum important_job_b4_grad if gender==`g'
		local mean_g`g': display %3.2f r(mean)
		local N_g`g': display %4.0f r(N)
		ttest important_job_b4_grad, by(gender)
		local p: display %4.3f r(p)
	}
	
	twoway (bar male_percent male_cat, barw(0.3) lcolor(navy) fcolor(navy%30)) ///
		(bar fem_percent fem_cat, barw(0.3) lcolor(maroon) fcolor(maroon)) ///
		, ///
		graphregion(color(white)) ///
		legend(order(1 "Male" 2 "Female")) ///
		name(figure35, replace) ///
		xtitle("How important is it to have a job before graduation?", size(medsmall)) ///
		note("Mean (men): `mean_g2'" "N (men): `N_g2'" "Mean (women): `mean_g1'" "N (women): `N_g1'" "p-val (mean diff)=`p'", ring(0) pos(11) size(vsmall))

graph export "${figures}figureA2.pdf", replace

******************************************************************************
* Figure A3: Gender Difference in Beliefs Bias – Within Individual Comparison
******************************************************************************

* tables_figures_june2022/fig5_beliefs_bias_indiv.pdf

	use postg_analysis.dta, clear
	keep if accepted_after_base==1
	drop if jobsearch_3==2				//added this in aug 2021
	
	gen RespondentID=RespondentID_1
	replace RespondentID=RespondentID_2 if missing(RespondentID)
	replace RespondentID=RespondentID_3 if missing(RespondentID)
	
	gen expect_real0=expected_totalpay
	gen expect_real1=actual_totalpay
	
	gen diff = expect_real0 - expect_real1
	
	sum diff if female==1
	local mean_f: display %4.0f r(mean) 
	local N_f: display %4.0f r(N)
	sum diff if female==0
	local mean_m : display %4.0f r(mean)
	local N_m: display %4.0f r(N) 
	
	# delimit ;
	twoway (kdensity diff if gender==2)(kdensity diff if gender==1, lpattern(dash)), legend(label(1 "Male") label(2 "Female"))
	xline(`mean_m', lcolor(navy)) xline(`mean_f', lcolor(maroon) lpattern(dash)) graphregion(color(white)) 
	xtitle("Earnings Expectations - Realized") ytitle("Density")
	text(0.00002 100000 "Mean (Male): $`mean_m', N:`N_m'", size(small))
	text(0.000019 103500 "Mean (Female): $`mean_f', N:`N_f'", size(small));
	
	# delimit cr
	
	** plot both distributions separately
	
	ksmirnov expect_real0=expect_real1 if gender==1 & diff~=.
	local ks_p: display %4.3f r(p)
	
	count if !missing(expect_real1) & gender==1 & diff~=.
	local n_r1=r(N)
	
	sum diff if gender==1
    local mean_diff: display %4.0f r(mean)
	
	twoway (kdensity expect_real1 if gender==1, bw(6000)) ///
	(kdensity expect_real0 if gender==1, bw(6000) lp(dash) ), xtitle("Earnings") ///
	ytitle("Density" " ") graphregion(color(white)) title("Female") name(comp_f, replace) ///
	legend(order(1 "Realization" 2 "Expectations") size(medsmall)) xlabel(0(50000)175000) ylabel(0(0.00001)0.00003) ///
	note("KS p-val: `ks_p'" "N: `n_r1'" "Mean Diff. (Expect-Real): $`mean_diff'", ring(0) pos(2) size(vsmall)) 
	
	ksmirnov expect_real0=expect_real1 if gender==2
	local ks_p: display %4.3f r(p)
	
	count if !missing(expect_real1) & gender==2 
	local n_r1=r(N)
	
	sum diff if gender==2
    local mean_diff: display %4.0f r(mean)
	
	twoway (kdensity expect_real1 if gender==2, bw(6000)) ///
	(kdensity expect_real0 if gender==2, bw(6000) lp(dash) ), xtitle("Earnings") ///
	ytitle("Density" " ") graphregion(color(white)) title("Male") name(comp_m, replace) ///
	legend(order(1 "Realization" 2 "Expectations") size(medsmall)) xlabel(0(50000)175000) ylabel(0(0.00001)0.00003) ///
	note("KS p-val: `ks_p'" "N: `n_r1'" "Mean Diff. (Expect-Real): $`mean_diff'", ring(0) pos(2) size(vsmall)) 
	
	grc1leg comp_f comp_m, graphregion(color(white)) 
	graph export "${figures}figureA3.pdf", replace

*********************************************************************
* Figure A4: CDF of Beliefs Bias by Gender – Cross-Cohort Comparison
*********************************************************************

* afig4_beliefs_bias_CDF.pdf

	use BU_grad_analysis_sample_aug2021.dta, clear 
	keep if indiv_tag==1
	
	gen quest_belief_self=quest_belief_male if gender==2
	replace quest_belief_self=quest_belief_female if gender==1
	gen expect_real_earn1=expected_totalpay if (cohort==2019 | cohort==2018)
	gen expect_real_earn2=first_total_nt if (cohort==2017 | cohort==2018)
	gen expect_real_earn0=quest_belief_self if (cohort==2019 | cohort==2018) & !missing(expected_totalpay)
	
	keep expect_real_earn* RespondentID gender cohort
	
	reshape long expect_real_earn expect_quest, i(RespondentID) j(real)
	keep if !missing(expect_real_earn)
	
	cdfplot expect_real_earn if gender==1, ///
	by(real) ///
	xtitle("Earnings") ///
	ytitle("Cumulative Density" " ") ///
	graphregion(color(white)) ///
	title("Females") ///
	name(comp_f, replace) ///
	legend(order(3 "2017-2018 Realization" 2 "2018-2019 Subjective Expectation" 1 "2018-2019 Population Beliefs") col(1) size(medsmall)) ///
	xlabel(0(50000)175000) ///
	nodraw ///
	opt1(lcolor(gs12%65 maroon navy) lwidth(thick medium medium) lpattern(solid shortdash longdash)) 
	
	cdfplot expect_real_earn if gender==2, ///
	by(real) ///
	xtitle("Earnings") ///
	ytitle("Cumulative Density" " ") ///
	graphregion(color(white)) ///
	title("Males") ///
	name(comp_m, replace) ///
	legend(order(3 "2017-2018 Realization" 2 "2018-2019 Subjective Expectation" 1 "2018-2019 Population Beliefs") col(1) size(medsmall)) ///
	xlabel(0(50000)175000) ///
	nodraw ///
	opt1(lcolor(gs12%65 maroon navy) lwidth(thick medium medium) lpattern(solid shortdash longdash)) 
	
	grc1leg comp_f comp_m, graphregion(color(white)) 
	graph export "${figures}figureA4.pdf", replace

*********************************************************************************
* Figure A5: Relationship Between Ex-Ante Earnings Expectations and Realizations 
*********************************************************************************

* tables_figures_june2022/realize_expect.pdf

use postg_analysis.dta, clear
drop if !missing(actual_totalpay_1)
drop if jobsearch_3==2				//added this in aug 2021

gen base_A=expected_totalpay_1
gen mid_A=expected_totalpay_2
*replace mid=actual_totalpay_2 if missing(mid)
gen realized_A=actual_totalpay

gen base_A_1000 = base_A/1000
gen mid_A_1000 = mid_A/1000
gen realized_A_1000 = realized_A/1000

		reg realized_A_1000 base_A_1000 if accepted==1, robust
			local N=e(N)
			local b: display %4.3fc _b[base_A_1000]
			mat table=r(table)
			local p table[4,1]
			local stars "   "
			if `p'<0.1 {
				local stars "*  "
			}
			if `p'<0.05 {
				local stars "** "
			}
			if `p'<0.01 {
				local stars "***"
			}
			
# delimit ; 
binscatter realized_A_1000 base_A_1000 if accepted==1, n(20)
ytitle("Actual First Year Earnings (1,000s)", size(medsmall)) ylabel(20(20)100)
xtitle("Expected First Year Earnings (1,000s) (Baseline)", size(medsmall)) xlabel(20(20)160) 
graphregion(color(white)) legend(label(1 Male ) label(2 Female) size(medsmall))
text(95 120 "Coef: `b'`stars'", place(e) size(small)) 
text(92 125 "N: `N'", place(e) size(small));
graph export "${figures}figureA5.pdf", replace;

*********************************************************************************
* Figure A6: Timing of Job Acceptance, Risk Preferences, and Overoptimism
*********************************************************************************

* fig6_accept_timing_risk.pdf
* accept_timing_ocW.pdf

** PANEL A **

	use BU_grad_analysis_sample_aug2021.dta, clear 
	sum accept_mo if accepted==1 & female~=. & risk2~=.
	local N=r(N)
		reg accept_mo risk2 if accepted==1, robust
			local b: display %4.3fc _b[risk2]
			mat table=r(table)
			local p table[4,1]
			local stars "   "
			if `p'<0.1 {
				local stars "*  "
			}
			if `p'<0.05 {
				local stars "** "
			}
			if `p'<0.01 {
				local stars "***"
			}

	binscatter accept_mo risk2 if accepted==1, legend(off) reportreg ///
	xtitle("Willingness to Take Risk (Average)", size(medsmall)) ytitle("Mean Month of Accepting An Offer" " ", size(medsmall)) ///
	name(figure7, replace) xlabel(1(1)6) ylabel(-2(0.5)0.5) yscale(range(`ymin' `ymax')) graphregion(color(white)) ///
	text(0.44 5 "Coef: `b'`stars'", place(e) size(small)) ///
	text(0.36 5.18  "N: `N'", place(e) size(small))
	graph save accept_timing_risk1, replace
		
		sum accept_6mafter_grad if accepted==1 & female~=. & risk2~=.
		local N=r(N)
		reg accept_6mafter_grad risk2 if accepted==1, robust
			local b: display %4.3fc _b[risk2]
			mat table=r(table)
			local p table[4,1]
			local stars "   "
			if `p'<0.1 {
				local stars "*  "
			}
			if `p'<0.05 {
				local stars "** "
			}
			if `p'<0.01 {
				local stars "***"
			}
			
	binscatter accept_6mafter_grad risk2 if accepted==1, legend(off) reportreg ///
	xtitle("Willingness to Take Risk (Average)", size(medsmall)) ytitle("Share Accepting Job {&ge} 6 Months" "After Graduation", size(medsmall)) ///
	name(figure7, replace) xlabel(1(1)6) ylabel(0.05(0.05)0.25) graphregion(color(white)) ///
	text(0.243 5 "Coef: `b'`stars'", place(e) size(small)) ///
	text(0.237 5.18  "N: `N'", place(e) size(small))
	graph save accept_timing_risk2, replace
	
graph combine accept_timing_risk1.gph accept_timing_risk2.gph, graphregion(color(white))
graph export "${figures}figureA7a.pdf", replace
erase accept_timing_risk1.gph
erase accept_timing_risk2.gph

** PANEL B **

	use BU_grad_analysis_sample_aug2021.dta, clear 
	gen oc=(expected_totalpay-first_total_nt)/first_total_nt*100 if !missing(expected_totalpay) & !missing(first_total_nt)
	winsor oc, gen(ocW) p(0.025)		//winsorizing top and bottom 2.5% 
	
	reg accept_mo ocW if accepted==1, robust
	local coef: display %4.3f _b[ocW]
	test ocW
	if r(p)<0.1{
		local coef "`coef'*"
	} 
	if r(p)<0.05{
		local coef "`coef'*"
	} 
	if r(p)<0.01{
		local coef "`coef'*"
	} 
	
	sum accept_mo if accepted==1 & oc1~=.
	local N=r(N)
	binscatter accept_mo ocW if accepted==1, ///
		graphregion(color(white)) ///
		xtitle("Overoptimism: [(Expect - Realized)/Realized]*100%") ///
		ytitle("Mean Month of Accepting An Offer") ///
		text(2.8 100 "Coef: `coef'", place(e) size(small)) ///
		text(2.55 107 "N: `N' ", place(e) size(small)) ylabel(-3(1)3)
	
	graph save accept_timing_ocW1, replace

	reg accept_6mafter_grad ocW if accepted==1, robust
	local coef: display %4.3f _b[ocW]
	test ocW
	if r(p)<0.1{
		local coef "`coef'*"
	} 
	if r(p)<0.05{
		local coef "`coef'*"
	} 
	if r(p)<0.01{
		local coef "`coef'*"
	} 
	
	sum accept_6mafter_grad if accepted==1 & oc1~=.
	local N=r(N)
	binscatter accept_6mafter_grad ocW if accepted==1, ///
		graphregion(color(white)) ///
		xtitle("Overoptimism: [(Expect - Realized)/Realized]*100%") ///
		ytitle("Share Accepting Job {&ge} 6 Months" "After Graduation") ///
		text(0.33 100 "Coef: `coef'", place(e) size(small)) ///
		text(0.315 107 "N: `N' ", place(e) size(small)) ylabel(0(0.05)0.35)
	
	graph save accept_timing_ocW2, replace

graph combine accept_timing_ocW1.gph accept_timing_ocW2.gph, graphregion(color(white))
graph export "${figures}figureA7b.pdf", replace
erase accept_timing_ocW1.gph
erase accept_timing_ocW2.gph

****************************************************************************************
* Figure A8: Acceptance Rates and Cumulative Mean Accepted Wage Across Rounds by Gender
****************************************************************************************

* roundaccept_raw.pdf
* acceptedwage_raw.pdf
* finalwage_raw.pdf

******************************************************************
* Figures A10 and A11: Correlations with Patience/Procrastination
******************************************************************

* patience_figure_2.pdf
* patience_figure.pdf

** PANEL A **

use temp_res_wages, clear

***PROCRASTINATION***

*re-standardizing procrastination index*
foreach var of varlist oftenlatetasks_3 regrettaskssooner_3 workbestlastmin_3{
	quietly sum `var' 
	gen `var'_sd= (`var'-r(mean))/r(sd)
}

gen procrastindex_3_v2= oftenlatetasks_3_sd + regrettaskssooner_3_sd + workbestlastmin_3_sd if !missing(oftenlatetasks_3) & !missing(regrettaskssooner_3) & !missing(workbestlastmin_3)
egen procrastindex_3_sd = std(procrastindex_3_v2)
sum procrastindex_3_v2

***PATIENCE***

recode patience_3 (1/2=2), gen(patience)
replace patience = patience-1
tab patience

reg reservation_wage_1W_000 procrastindex_3_sd if weird~=1 & reservation_wage_1>=20000, robust	
			local b: display %4.3fc _b[procrastindex_3_sd ]
			local N=e(N)
			mat table=r(table)
			local p table[4,1]
			local stars "   "
			if `p'<0.1 {
				local stars "*  "
			}
			if `p'<0.05 {
				local stars "** "
			}
			if `p'<0.01 {
				local stars "***"
			}
# delimit ;			
binscatter reservation_wage_1W_000 procrastindex_3_sd if weird~=1 & reservation_wage_1>=20000, 
xtitle("Procrastination Index (Standardized)") ytitle("Ex-Ante Reservation Earnings" "(1,000$)")
	graphregion(color(white)) ylabel(30(10)80)
	text(75 1.05 "Coef: `b'`stars'", place(e) size(small)) 
	text(73 1.2  "N: `N'", place(e) size(small));
	graph save reservation_wage_procrastinate, replace;

# delimit cr
reg reservation_wage_1W_000 patience if weird~=1 & reservation_wage_1>=20000, robust
			local b: display %4.3fc _b[patience]
			local N=e(N)
			mat table=r(table)
			local p table[4,1]
			local stars "   "
			if `p'<0.1 {
				local stars "*  "
			}
			if `p'<0.05 {
				local stars "** "
			}
			if `p'<0.01 {
				local stars "***"
			}
# delimit ;			
binscatter reservation_wage_1W_000 patience if weird~=1 & reservation_wage_1>=20000, 
xtitle("Time Preferences: Patience (1-6)") ytitle("Ex-Ante Reservation Earnings" "(1,000$)", size(medsmall))
	xlabel() ylabel(30(10)80) graphregion(color(white)) 
	text(75 5 "Coef: `b'`stars'", place(e) size(small)) 
	text(73 5.18  "N: `N'", place(e) size(small));
	graph save reservation_wage_patience, replace;

# delimit cr

** Search Timing **

use BU_grad_analysis_sample_aug2021.dta, clear 
keep if accepted==1

gen oc=(expected_totalpay-first_total_nt)/first_total_nt*100 if !missing(expected_totalpay) & !missing(first_total_nt)
winsor oc, gen(ocW) p(0.025)		//winsorizing top and bottom 2.5% 

sum startsearch_mo

gen weird = 1 if startsearch_mo > activestart_mo & (startsearch_mo~=. | activestart_mo~=.)

gen startsearch_beforegrad = startsearch_mo<0
replace startsearch_beforegrad=. if startsearch_mo==.

gen startsearch_3mbeforegrad = startsearch_mo<=-3
replace startsearch_3mbeforegrad=. if startsearch_mo==.

gen startsearch_6mbeforegrad = startsearch_mo<=-6
replace startsearch_6mbeforegrad=. if startsearch_mo==.

reg startsearch_mo risk2 if accepted==1, robust
reg startsearch_mo ocW if accepted==1, robust

reg startsearch_beforegrad risk2 if accepted==1, robust
reg startsearch_beforegrad ocW if accepted==1, robust

*re-standardizing procrastination index*
foreach var of varlist oftenlatetasks_3 regrettaskssooner_3 workbestlastmin_3{
	quietly sum `var' if indiv_tag==1
	gen `var'_sd= (`var'-r(mean))/r(sd)
}

gen procrastindex_3_v2= oftenlatetasks_3_sd + regrettaskssooner_3_sd + workbestlastmin_3_sd if !missing(oftenlatetasks_3) & !missing(regrettaskssooner_3) & !missing(workbestlastmin_3)
egen procrastindex_3_sd = std(procrastindex_3_v2)
sum procrastindex_3_v2

* Share starting search before graduation and patience *

reg startsearch_beforegrad procrastindex_3_sd, robust 
			local b: display %4.3fc _b[procrastindex_3_sd]
			mat table=r(table)
			local p table[4,1]
			local stars "   "
			if `p'<0.1 {
				local stars "*  "
			}
			if `p'<0.05 {
				local stars "** "
			}
			if `p'<0.01 {
				local stars "***"
			}
local N = e(N)

# delimit ;
binscatter startsearch_beforegrad procrastindex_3_sd, n(20) ylabel(0.4(0.1)1)
xtitle("Procrastination Index (Standardized)") ytitle("Share Starting Search Before Graduation")
text(0.92 1.05 "Coef: `b' ", place(e) size(small)) text(0.9 1.2 "N: `N' ", place(e) size(small)) graphregion(color(white));
graph save search_timing_procrastinate, replace;

# delimit cr

***PATIENCE***

recode patience_3 (1/2=2), gen(patience)
replace patience = patience-1
tab patience

reg startsearch_beforegrad patience, robust 
			local b: display %4.3fc _b[patience]
			mat table=r(table)
			local p table[4,1]
			local stars "   "
			if `p'<0.1 {
				local stars "*  "
			}
			if `p'<0.05 {
				local stars "** "
			}
			if `p'<0.01 {
				local stars "***"
			}
local N = e(N)

# delimit ;
binscatter startsearch_beforegrad patience, n(20) ylabel(0.4(0.1)1)
xtitle("Time Preferences: Patience (1-6)") ytitle("Share Starting Search Before Graduation")
text(0.92 5 "Coef: `b' ", place(e) size(small)) text(0.9 5.18 "N: `N' ", place(e) size(small)) graphregion(color(white));
graph save search_timing_patience, replace;

* Combining figures *

# delimit cr
graph combine reservation_wage_procrastinate.gph search_timing_procrastinate.gph, graphregion(color(white))
graph export "${figures}figureA11a.pdf", replace
erase reservation_wage_procrastinate.gph
erase search_timing_procrastinate.gph

graph combine reservation_wage_patience.gph search_timing_patience.gph, graphregion(color(white))
graph export "${figures}figureA10a.pdf", replace
erase reservation_wage_patience.gph
erase search_timing_patience.gph

** PANEL B **

use BU_grad_analysis_sample_aug2021.dta, clear 
keep if accepted==1

gen oc=(expected_totalpay-first_total_nt)/first_total_nt*100 if !missing(expected_totalpay) & !missing(first_total_nt)
winsor oc, gen(ocW) p(0.025)		//winsorizing top and bottom 2.5% 

***PROCRASTINATION***

*re-standardizing procrastination index*
foreach var of varlist oftenlatetasks_3 regrettaskssooner_3 workbestlastmin_3{
	quietly sum `var' if indiv_tag==1
	gen `var'_sd= (`var'-r(mean))/r(sd)
}

gen procrastindex_3_v2= oftenlatetasks_3_sd + regrettaskssooner_3_sd + workbestlastmin_3_sd if !missing(oftenlatetasks_3) & !missing(regrettaskssooner_3) & !missing(workbestlastmin_3)
egen procrastindex_3_sd = std(procrastindex_3_v2)
sum procrastindex_3_v2

reg accept_mo procrastindex_3_sd, robust 
			local b: display %4.3fc _b[procrastindex_3_sd]
			mat table=r(table)
			local p table[4,1]
			local stars "   "
			if `p'<0.1 {
				local stars "*  "
			}
			if `p'<0.05 {
				local stars "** "
			}
			if `p'<0.01 {
				local stars "***"
			}
local N = e(N)

# delimit ;
binscatter accept_mo procrastindex_3_sd, n(20) ylabel(-1.5(0.5)2.5)
xtitle("Procrastination Index (Standardized)") ytitle("Mean Month of Accepting An Offer")
text(2.2 1 "Coef: `b' ", place(e) size(small)) text(2.05 1.2 "N: `N' ", place(e) size(small)) graphregion(color(white));
graph save accept_timing_procrastinate, replace;

# delimit cr
reg first_total_nt procrastindex_3_sd, robust 
			local b: display %4.0fc _b[procrastindex_3_sd]
			mat table=r(table)
			local p table[4,1]
			local stars "   "
			if `p'<0.1 {
				local stars "*  "
			}
			if `p'<0.05 {
				local stars "** "
			}
			if `p'<0.01 {
				local stars "***"
			}
local N = e(N)

# delimit ;
binscatter first_total_nt procrastindex_3_sd, n(20) ylabel(50000(10000)80000)
xtitle("Procrastination Index (Standardized)") ytitle("Mean First Year Earnings")
text(78000 1 "Coef: `b' ", place(e) size(small)) text(77000 1.15 "N: `N' ", place(e) size(small)) graphregion(color(white));
graph save earnings_procrastinate, replace;

# delimit cr
graph combine accept_timing_procrastinate.gph earnings_procrastinate.gph, graphregion(color(white))
graph export "${figures}figureA11b.pdf", replace

** PATIENCE **

recode patience_3 (1/2=2), gen(patience)
replace patience = patience-1
tab patience

reg patience female
reg accept_mo patience
reg accept_6mafter_grad patience
reg first_total_nt patience

reg accept_mo patience, robust 
			local b: display %4.3fc _b[patience]
			mat table=r(table)
			local p table[4,1]
			local stars "   "
			if `p'<0.1 {
				local stars "*  "
			}
			if `p'<0.05 {
				local stars "** "
			}
			if `p'<0.01 {
				local stars "***"
			}
local N = e(N)

# delimit ;
binscatter accept_mo patience, n(20) ylabel(-1.5(0.5)2.5)
xtitle("Time Preferences: Patience (1-6)") ytitle("Mean Month of Accepting An Offer")
text(2.2 5 "Coef: `b' ", place(e) size(small)) text(2.05 5.18 "N: `N' ", place(e) size(small)) graphregion(color(white));
graph save accept_timing_patience, replace;

# delimit cr
reg first_total_nt patience, robust 
			local b: display %4.0fc _b[patience]
			mat table=r(table)
			local p table[4,1]
			local stars "   "
			if `p'<0.1 {
				local stars "*  "
			}
			if `p'<0.05 {
				local stars "** "
			}
			if `p'<0.01 {
				local stars "***"
			}
local N = e(N)

# delimit ;
binscatter first_total_nt patience, n(20) ylabel(50000(10000)80000)
xtitle("Time Preferences: Patience (1-6)") ytitle("Mean First Year Earnings")
text(78000 5 "Coef: `b' ", place(e) size(small)) text(77000 5.18 "N: `N' ", place(e) size(small)) graphregion(color(white));
graph save earnings_patience, replace;

# delimit cr
graph combine accept_timing_patience.gph earnings_patience.gph, graphregion(color(white))
graph export "${figures}figureA10b.pdf", replace

**********************************************************
* Table A1: Sample Sizes for Survey of "Current" Students
**********************************************************

* postg_sumstats.tex

	use postg_analysis.dta, clear
	cap file close table
	file open table using "${figures}tableA1.tex", write replace
	count if base_dummy==1 & postg_dummy==1 & mid_dummy==1
	local n_all: display %3.0f `r(N)'
	count if base_dummy==1 & postg_dummy==1 & mid_dummy==1 & cohort==2018
	local n_all_c18: display %3.0f r(N)
	
	count if base_dummy==1 & postg_dummy==1
	local n_1_3: display %3.0f `r(N)'
	count if base_dummy==1 & mid_dummy==1
	local n_1_2: display %3.0f `r(N)'
	count if postg_dummy==1 & mid_dummy==1
	local n_2_3: display %3.0f `r(N)'
	
	count if base_dummy==1 & postg_dummy==0 
	local n_b: display %3.0f `r(N)'
	count if base_dummy==0 & postg_dummy==1 
	local n_p: display %3.0f `r(N)'
	
	count if !missing(actual_totalpay) & !missing(expected_totalpay)
	local n_ae: display %3.0f `r(N)'
	count if !missing(accept_mo) 
	local n_a: display %3.0f `r(N)'
	count if !missing(expected_totalpay)
	local n_e: display %3.0f `r(N)'
	
	count if cohort==2018
	local n_2018: display %3.0f r(N)
	count if cohort==2019
	local n_2019: display %3.0f r(N)
	
	local fwt "file write table"
	`fwt' "\begin{table}[H]\caption{Sample Sizes for Survey of Current Students} \centering \begin{threeparttable} \begin{tabular}{lc} \toprule" _n
	`fwt' " & Number of Observations \\ \hline" _n
	`fwt' " & \\"_n
	`fwt' " Took All Three & `n_all'  \\" _n
	`fwt' " Took All Three, 2018 & `n_all_c18'  \\" _n

	`fwt' " & \\"_n
	`fwt' " Took Base and Post-Grad & `n_1_3'  \\" _n
	`fwt' " Took Base and Mid-Search & `n_1_2'  \\" _n
	`fwt' " Took Mid-Search and Post-Grad & `n_2_3'  \\" _n
	`fwt' " & \\"_n
	`fwt' " Took Base and NOT Post-Grad & `n_b'  \\" _n
	`fwt' " Took Post-Grad and NOT Base & `n_p'  \\" _n	
	`fwt' " & \\"_n
	`fwt' " Have Data on Baseline Expectations and Realizations & `n_ae'  \\" _n	
	`fwt' " Have Data on Baseline Expectations & `n_e'  \\" _n	
	`fwt' " Have Data on Realizations & `n_a'  \\" _n	
	`fwt' " & \\"_n
	`fwt' " 2018 Cohort & `n_2018'  \\" _n	
	`fwt' " 2019 Cohort & `n_2019'  \\" _n	
	`fwt' " & \\"_n
	`fwt' "\bottomrule" _n
	`fwt' "\end{tabular}"_n
	`fwt' "\begin{tablenotes} \item[] \footnotesize"_n
	`fwt' "\emph{Notes:} " _n
	`fwt' "\end{tablenotes} \end{threeparttable} \end{table}" _n
	file close table

************************************
* Table A2: Responses Across Waves
************************************

* responses_across_waves.tex

use postg_analysis.dta, clear

cap drop major

gen major = major_1
gen second_major = second_major_1
gen third_major = third_major_1

*Concentration Dummies
gen conc_acc = (major=="Accounting"|second_major=="Accounting"|third_major=="Accounting")
label var conc_acc "Concentration in Accounting, could be first, second, or third major"
replace conc_acc=. if major=="" & second_major=="" & third_major==""

gen conc_ent = (major=="Entrepreneurship"|second_major=="Entrepreneurship"|third_major=="Entrepreneurship")
label var conc_ent "Concentration in Entrepreneurship, could be first, second, or third major"
replace conc_ent=. if major=="" & second_major=="" & third_major==""

gen conc_fin = (major=="Finance"|second_major=="Finance"|third_major=="Finance")
label var conc_fin "Concentration in Finance, could be first, second, or third major"
replace conc_fin=. if major=="" & second_major=="" & third_major==""

gen conc_gen_mgt = (major=="General Management"|second_major=="General Management"|third_major=="General Management")
label var conc_gen_mgt "Concentration in General Management, could be first, second, or third major"
replace conc_gen_mgt=. if major=="" & second_major=="" & third_major==""

gen conc_int_mgt = (major=="International Management"|second_major=="International Management"|third_major=="International Management")
label var conc_int_mgt "Concentration in International Management, could be first, second, or third major"
replace conc_int_mgt=. if major=="" & second_major=="" & third_major==""

gen conc_law = (major=="Law"|second_major=="Law"|third_major=="Law")
label var conc_law "Concentration in Law, could be first, second, or third major"
replace conc_law=. if major=="" & second_major=="" & third_major==""

gen conc_mis = (major=="Management Information Systems"|second_major=="Management Information Systems"|third_major=="Management Information Systems")
label var conc_mis "Concentration in Management Information Systems, could be first, second, or third major"
replace conc_mis=. if major=="" & second_major=="" & third_major==""

gen conc_mkg = (major=="Marketing"|second_major=="Marketing"|third_major=="Marketing")
label var conc_mkg "Concentration in Marketing, could be first, second, or third major"
replace conc_mkg=. if major=="" & second_major=="" & third_major==""

gen conc_otm = (major=="Operations and Technology Management"|second_major=="Operations and Technology Management"|third_major=="Operations and Technology Management")
label var conc_otm "Concentration in Operations and Technology Management, could be first, second, or third major"
replace conc_otm=. if major=="" & second_major=="" & third_major==""

gen conc_ob = (major=="Organizational Behavior"|second_major=="Organizational Behavior"|third_major=="Organizational Behavior")
label var conc_ob "Concentration in Organizational Behavior, could be first, second, or third major"
replace conc_ob=. if major=="" & second_major=="" & third_major==""

replace second_major = "none" if second_major ==""
replace third_major = "none" if third_major ==""

	foreach race in "White" "Black" "American" "Hispanic" "Asian"{
		gen race_`race'_1 = strpos(race_1, "`race'")!=0 if !missing(race_1)
	}
	
	*Force 1 race per person
	foreach var of varlist race_White_1 race_Asian_1 race_Hispanic_1 race_American_1{
		replace `var'=0 if !missing(`var') & race_Black_1==1
	}
	foreach var of varlist race_White_1 race_Asian_1 race_Hispanic_1 {
		replace `var'=0 if !missing(`var') & race_American_1==1
	}
	foreach var of varlist race_White_1 race_Asian_1 {
		replace `var'=0 if !missing(`var') & race_Hispanic ==1
	}
	foreach var of varlist race_Asian_1 {
		replace `var'=0 if !missing(`var') & race_White_1 ==1
	}

	forval i=2018/2019{
	gen cohort`i'= cohort==`i'
	}
	
	*setup risk tolerance vars
	
	recode trait_risk_daily_1 (1=2), gen(trait_risk_daily2_1)
	replace trait_risk_daily2_1 = trait_risk_daily2_1 -1

	recode trait_risk_finance_1 (1=2), gen(trait_risk_finance2_1)
	replace trait_risk_finance2_1 = trait_risk_finance2_1 -1

	egen risk2_1 = rmean(trait_risk_daily2_1 trait_risk_finance2_1)
	replace risk2_1 = . if trait_risk_daily2_1==.
	
*i.cohort i.major_code i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa fa_ba mo_ba

sum female_1 age_1 us_born_1 gpa_1 conc_* race_*_1 us_born_1 fa_ba_1 mo_ba_1 expected_totalpay_1 confidence_1 risk2_1 if base_dummy==1

macro define variables "female_1 age_1 gpa_1 cohort2018 cohort2019 race_*_1 us_born_1 fa_ba_1 mo_ba_1 conc_*  risk2_1 confidence_1 expected_totalpay_1"

gen sample_baseline = base_dummy==1
gen sample_baseline_ffu = (base_dummy==1 & mid_dummy==1)
gen sample_baseline_final = (base_dummy==1 & postg_dummy==1)
gen sample_all3 = (base_dummy==1 & mid_dummy & postg_dummy==1)

preserve
keep if sample_baseline==1
keep $variables
gen sample=1
save sample_baseline, replace
restore

preserve
keep if sample_baseline_ffu==1
keep $variables
gen sample=2
save sample_baseline_ffu, replace
restore

preserve
keep if sample_baseline_final==1
keep $variables
gen sample=3
save sample_baseline_final, replace
restore

preserve
keep if sample_all3==1
keep $variables
gen sample=4
save sample_all3, replace
restore
	
use sample_baseline, clear
append using sample_baseline_ffu
append using sample_baseline_final
append using sample_all3
	
sum $variables if sample==1
sum $variables if sample==2
sum $variables if sample==3
sum $variables if sample==4

recode sample (1=0)(2=1)(else=.), gen(sample2)
recode sample (1=0)(3=1)(else=.), gen(sample3)
recode sample (1=0)(4=1)(else=.), gen(sample4)

set more off
reg female_1 sample2
outreg2 using pvalue_sample, bdec(3) p noparen excel replace

foreach var of varlist $variables{
reg `var' sample2
outreg2 using pvalue_sample, bdec(3) p noparen excel append
reg `var' sample3
outreg2 using pvalue_sample, bdec(3) p noparen excel append
reg `var' sample4
outreg2 using pvalue_sample, bdec(3) p noparen excel append
}

erase sample_baseline.dta
erase sample_baseline_ffu.dta
erase sample_baseline_final.dta

/*** WRITE TABLE IN LATEX ***/

rename *_1 *

	cap file close table
	
	file open table using "${figures}tableA2.tex", write replace

	*Row labels
    local label1_female "Female"
	local label1_age "Age"
	

	local label1_gpa "GPA"
	local label1_race_White 			"Race"
	local label2_race_White 			"White"
	local label2_race_Black 			"Black"
	local label2_race_American 			"American Indian"
	local label2_race_Hispanic 			"Hispanic"
	local label2_race_Asian		"Asian"

	local label1_us_born "Born in U.S."		
	local label1_fa_ba "Father BA+"
	local label1_mo_ba "Mother BA+"

	local label1_conc_acc 		"Concentration"
	local label2_conc_acc 		"Accounting"
	local label2_conc_ent 		"Entrepreneurship"
	local label2_conc_fin 		"Finance"
	local label2_conc_gen_mgt	"General Management"
	local label2_conc_int_mgt 	"Intl Management"
	local label2_conc_law 		"Law"
	local label2_conc_mis 		"Mgmt Info. Systems"
	local label2_conc_mkg 		"Marketing"
	local label2_conc_otm 		"Ops. \& Tech Mgmt"
	local label2_conc_ob 		"Org Behavior"

	local label1_confidence "Perceived Rel. Ability (1-5)"

	local label1_risk2 "Risk Tolerance"
	
	local label1_cohort2018 "Cohort"
	local label2_cohort2018 "2018"
	local label2_cohort2019 "2019"

	local label1_expected_totalpay "Expected Total Pay ($1,000s)"
	
	*list of variables to show mean and stdev			
	local continuous_demos age gpa risk2 confidence expected_totalpay
	
	*list of variables to show proportion as a percentage of population
	local bin_demos female race_White race_Black race_American race_Hispanic race_Asian us_born fa_ba mo_ba ///
					conc_acc conc_ent conc_fin conc_gen_mgt conc_int_mgt conc_law conc_mis conc_mkg conc_otm conc_ob ///
					cohort2018 cohort2019 
					
	//calculate sum stats
	
	foreach var of varlist `continuous_demos'{
		forval i=1/4 {
			su `var' if sample==`i'
			local `var'_mean_`i': display %3.2f `r(mean)' 
			local `var'_sd_`i': display %3.2f `r(sd)' 
		}
		}
	
	foreach var of varlist `bin_demos' {
		forval i=1/4 {
			su `var' if sample==`i'
			local `var'_mean_`i': display %4.3fc `r(mean)' 
			local `var'_sd_`i': display %4.3fc `r(sd)' 
		}
		}

	count if sample==1
	local s1_N `r(N)'
	count if sample==2
	local s2_N `r(N)'
	count if sample==3
	local s3_N `r(N)'
	count if sample==4
	local s4_N `r(N)'

	local fwt "file write table"
	`fwt' "\begin{table}[H]\caption{Responses Across Waves} \centering \footnotesize \begin{threeparttable} \begin{tabular}{rlcccc} \toprule" _n
	
	`fwt' " && Baseline & Baseline $+$ Mid  & Baseline $+$ Final & All Three \\ \hline" _n
	`fwt' " Observations && `s1_N' & `s2_N' & `s3_N' & `s4_N'  \\" _n
	foreach var of varlist `bin_demos' {
		`fwt' "`label1_`var'' & `label2_`var'' & ``var'_mean_1' & ``var'_mean_2' & ``var'_mean_3' & ``var'_mean_4'  \\" _n
	}

	foreach var of varlist `continuous_demos' {
		`fwt' "`label1_`var'' & `label2_`var'' & ``var'_mean_1' & ``var'_mean_2' & ``var'_mean_3' & ``var'_mean_4'  \\" _n
		`fwt' "  &  & (``var'_sd_1') & (``var'_sd_2') & (``var'_sd_3') & (``var'_sd_4') \\" _n
	} 
		
	`fwt' "\bottomrule" _n
	`fwt' "\end{tabular}"_n
	`fwt' "\begin{tablenotes} \item[] \footnotesize"_n
		`fwt' "Note: " _n
	`fwt' "\end{tablenotes} \end{threeparttable} \end{table}" _n
	file close table

********************************************************
* Table A3: Response Rates Based on Administrative Data
********************************************************

* PLEASE CONTACT AUTHORS FOR ADMIN DATA AND DO-FILE

********************************************************
* Table A4: Who Responded to the Surveys?
********************************************************

* PLEASE CONTACT AUTHORS FOR ADMIN DATA AND DO-FILE

********************************************************************************
* Table A5: Summary Statistics of All Respondents vs. Analysis Sample, By Gender
********************************************************************************

* table1_appendix.tex

	set more off
	use BU_grad_analysis_sample_aug2021.dta, clear 	
	
	gen mid_postg_dummy = 0
	replace mid_postg_dummy = 1 if mid_dummy==1 | postg_dummy==1
	replace mid_postg_dummy = . if mid_dummy==. & postg_dummy==.   	

	tab mid_postg_dummy if accepted==1 & cohort>=2018 // there are 10 students who have accepted jobs but only answered baselinne survey
	
	gen full_sample = 1 if (cohort<=2017)
	replace full_sample = 1 if cohort==2018 & (postg_dummy==1 | mid_dummy==1)
	replace full_sample = 1 if cohort==2019 & (postg_dummy==1 | mid_dummy==1)
	replace full_sample = 1 if cohort>=2018 & accepted==1 
	
	tab full_sample if accepted==1 //check that everyone who accepted a job is in the full sample
	
	*gender differences in having accepted a job (need to insert this manually into the table*
	sum accepted if full_sample==1 & female==1 & indiv_tag==1
	sum accepted if full_sample==1 & female==0 & indiv_tag==1
	reg accepted female if full_sample==1 & indiv_tag==1
	
	cap file close table
	
	file open table using "${figures}tableA5.tex", write replace
	forval i=1/11{
		gen major`i'= major_code==`i'
	}
	
	forval i=2013/2019{
		gen cohort`i'= cohort==`i'
	}

	*Force 1 race per person
	foreach var of varlist race_asia_pacif race_white race_latino race_am_india {
		replace `var'=0 if !missing(`var') & race_black==1
	}
	foreach var of varlist race_asia_pacif race_white race_latino {
		replace `var'=0 if !missing(`var') & race_am_india==1
	}
	foreach var of varlist race_asia_pacif race_white {
		replace `var'=0 if !missing(`var') & race_latino==1
	}
	foreach var of varlist race_asia_pacif {
		replace `var'=0 if !missing(`var') & race_white==1
	}
	foreach var of varlist race_asia_pacif race_white race_latino race_am_india race_black{
		replace `var'=. if miss_race==1
	}
	
	*any debt
	gen any_debt=student_debt!=0 if !missing(student_debt)
	
	replace student_debt=student_debt/1000 if !missing(student_debt)
	replace parents_income=parents_income/1000 if !missing(parents_income)
	
	*Row labels
	local label1_age "Age"
	
	local label1_gpa "GPA"
	
	local label1_race_white 			"Race"
	local label2_race_white 			"White/Caucasian"
	local label2_race_black 			"Black/ African American"
	local label2_race_am_india 			"American Indian"
	local label2_race_latino 			"Hispanic/ Latino"
	local label2_race_asia_pacif		"Asian/ Pacific Islander"

	local label1_us_born "Born in U.S."		
	local label1_fa_ba "Father BA+"
	local label1_mo_ba "Mother BA+"

	local label1_conc_acc 		"Concentration"
	local label2_conc_acc 		"Accounting"
	local label2_conc_ent 		"Entrepreneurship"
	local label2_conc_fin 		"Finance"
	local label2_conc_gen_mgt	"General Management"
	local label2_conc_int_mgt 	"International Management"
	local label2_conc_law 		"Law"
	local label2_conc_mis 		"Management Info. Systems"
	local label2_conc_mkg 		"Marketing"
	local label2_conc_otm 		"Operations \& Tech. Mgmt."
	local label2_conc_ob 		"Organizational Behavior"
	
	local label1_cohort "Graduation Year"

	*local label1_trait_confidence "Confidence in Ability"
	local label1_trait_confidence "Perceived Relative Ability (1-5)"
	
	local label1_trait_risk_daily "Risk Tolerance (1-7)"
	local label2_trait_risk_daily "Daily"
	local label2_trait_risk_finance "Financial"
	local label1_risk2 "Risk Tolerance"
	
	local label1_att_career_money "Career Values"
	local label2_att_career_money "Money"
	local label2_att_career_leader "Being a Leader"
	local label2_att_career_enjoy "Enjoying your Work"
	local label2_att_career_helpful "Helping Others"
	local label2_att_career_group "Working with Others"
	
	local label1_high_risk_daily "Percent High Risk ($\geq 6$)"
	local label1_high_risk2 "Percent High Risk ($\geq 5$)"
	local label2_high_risk_daily "Daily"
	local label2_high_risk_finance "Financial"
	
	local label1_over_conf "Percent Overconfident"
	
	local label1_cohort2013 "Cohort"
	local label2_cohort2013 "2013"
	local label2_cohort2014 "2014"
	local label2_cohort2015 "2015"
	local label2_cohort2016 "2016"
	local label2_cohort2017 "2017"
	local label2_cohort2018 "2018"
	local label2_cohort2019 "2019"
	
	*local label1_worked_after_grad "Ever Accept Job Offer"
	local label1_accepted "Accepted Job Offer to Work after Grad"
	
	local label1_student_debt "Student Debt(1,000s)$^{\dagger}$"
	local label1_any_debt "Proportion With Debt$^{\dagger}$"
	local label1_parents_income "Parent's Income (1,000s)$^{\dagger}$"
	
	local label1_procrastindex_3 "Procrastination Index (sd)$^{*}$"
	local label1_patience_3 "Patience (1-7)$^{*}$"
	local label1_oc1 "Exp-Real (pp)$^{*}$"

	*list of variables to show mean and stdev
	local continuous_demos age gpa risk2 trait_confidence
	
	*list of variables to show proportion as a percentage of population
	local bin_demos race_white race_black race_am_india race_latino race_asia_pacif us_born fa_ba mo_ba ///
					conc_acc conc_ent conc_fin conc_gen_mgt conc_int_mgt conc_law conc_mis conc_mkg conc_otm conc_ob ///
					accepted cohort2013 cohort2014 cohort2015 cohort2016 cohort2017 cohort2018 cohort2019 ///
					high_risk2 
	
	keep if indiv_tag==1 & full_sample==1
	
	expand 2 if accepted==1 
	sort RespondentID

	bys RespondentID: gen count = _n if accepted==1 
	replace count=1 if count==.
	replace accepted=0 if count==2 

	tab count accepted
	
	gen acceptedXgender = accepted*gender 
	
	//calculate sum stats
	
	foreach var of varlist `continuous_demos' {
		forval i=1/2 {
			su `var' if gender==`i' & accepted==0
			local `var'_mean_`i': display %3.2f `r(mean)' 
			local `var'_sd_`i': display %3.2f `r(sd)' 
		}
		
		forval i=1/2 {
			su `var' if gender==`i' & accepted==1
			local `var'_mean2_`i': display %3.2f `r(mean)' 
			local `var'_sd2_`i': display %3.2f `r(sd)' 
		}
		
		reg `var' acceptedXgender accepted gender
		local `var'_p: display %4.3f (2 * ttail(e(df_r), abs(_b[acceptedXgender]/_se[acceptedXgender])))
	}
	
	foreach var of varlist `bin_demos' {
		forval i=1/2 {
			su `var' if gender==`i' & accepted==0
			local `var'_mean_`i': display %3.1f `r(mean)'*100
			local `var'_sd_`i': display %3.1f `r(sd)'*100
		}
		
		forval i=1/2 {
			su `var' if gender==`i' & accepted==1
			local `var'_mean2_`i': display %3.1f `r(mean)'*100
			local `var'_sd2_`i': display %3.1f `r(sd)'*100
		}
		
		reg `var' acceptedXgender accepted gender
		local `var'_p: display %4.3f (2 * ttail(e(df_r), abs(_b[acceptedXgender]/_se[acceptedXgender])))
	}
	
	count if gender==2 & accepted==0
	local m_N `r(N)'
	count if gender==1 & accepted==0
	local f_N `r(N)'
	
	count if gender==2 & accepted==1
	local m_N2 `r(N)'
	count if gender==1 & accepted==1
	local f_N2 `r(N)'

	local fwt "file write table"
	`fwt' "\begin{table}[H]\caption{Summary Statistics of All Respondents vs. Analysis Sample, By Gender} \centering \footnotesize \begin{threeparttable} \begin{tabular}{rlcccccc} \toprule" _n
	`fwt' "&& \multicolumn{2}{c}{Full sample} & \multicolumn{2}{c}{Accepted} & \\" _n 
	`fwt' "\cmidrule(lr){3-4} \cmidrule(lr){5-6}" _n  			
	`fwt' " && Men & Women & Men & Women & p-value \\ \hline" _n
	`fwt' " Observations && `m_N' & `f_N' & `m_N2' & `f_N2' & \\" _n

	foreach var of varlist `bin_demos' {
		`fwt' "`label1_`var'' & `label2_`var'' &  ``var'_mean_2'\% & ``var'_mean_1'\% & ``var'_mean2_2'\% & ``var'_mean2_1'\%  & ``var'_p'  \\" _n
	}

	foreach var of varlist `continuous_demos' {
		`fwt' "`label1_`var'' & `label2_`var''& ``var'_mean_2' & ``var'_mean_1' & ``var'_mean2_2' & ``var'_mean2_1' & ``var'_p'  \\" _n
		`fwt' "  &  &  (``var'_sd_2') & (``var'_sd_1') & (``var'_sd2_2') & (``var'_sd2_1') & \\" _n
	} 
	
	
	`fwt' "\bottomrule" _n
	`fwt' "\end{tabular}"_n
	`fwt' "\begin{tablenotes} \item[] \footnotesize"_n
	`fwt' "Note: The table compares the mean characteristics between the full sample of respondents and those who accepted a job by gender. The last column reports the p-value on a statistical test of the comparison of the gender difference in means between the two samples (full sample vs. accepted sample)." _n
	`fwt' "\end{tablenotes} \end{threeparttable} \end{table}" _n
	file close table

***********************************************
* Table A6: Gender Gap in Accepted Wage (Lab)
***********************************************

* regs_tab3_noround6.tex

use temp_expt, clear
tab roundaccept

drop _merge
merge 1:1 id using sim_wage_data_small

tab roundaccept if roundaccept_sim==6
assert wageaccept==2 if roundaccept_sim==6	// there are 34 observations where no offer was accepted and outside wage of $2 was assigned

macro define base_controls "gpa_wmiss us_born_wmiss miss_gpa miss_us_born i.asuyear fa_educ_high_wmiss mo_educ_high_wmiss miss_fa_educ_high miss_mo_educ_high race_white_wmiss race_asian_wmiss miss_race_white miss_race_asian compeng_wmiss busecon_wmiss miss_compeng miss_busecon time_ce1_std time_ce2_std"

	est clear
	eststo clear
	eststo: reg wageaccept female fast 1.glitch if roundaccept_sim~=6, r
	estadd ysumm
	eststo: reg wageaccept female fast risk_r_std 1.glitch if roundaccept_sim~=6, r 
	estadd ysumm
	eststo: reg wageaccept female fast expprior_std 1.glitch if roundaccept_sim~=6, r 
	estadd ysumm
	eststo: reg wageaccept female fast risk_r_std expprior_std 1.glitch if roundaccept_sim~=6, r 
	estadd ysumm

	eststo: reg wageaccept female fast 1.glitch $base_controls if roundaccept_sim~=6, r 
	estadd ysumm
	eststo: reg wageaccept female fast risk_r_std 1.glitch $base_controls if roundaccept_sim~=6, r 
	estadd ysumm
	eststo: reg wageaccept female fast expprior_std 1.glitch $base_controls if roundaccept_sim~=6, r 
	estadd ysumm
	eststo: reg wageaccept female fast risk_r_std expprior_std 1.glitch $base_controls if roundaccept_sim~=6, r 
	estadd ysumm
	
	# delimit ;
	esttab * using "${figures}tableA6.tex", l stats(ymean r2 N, fmt(2 2 0) labels("Mean" "\(R^{2}\)" "N"))
	nomti se b(%4.3f) starlevels(* 0.10 ** 0.05 *** 0.01) style(tex) 
	booktabs collabels(none) gaps nonotes title("")
	substitute([htbp] [!htbp] \begin{tabular} \small\begin{tabular} {l} {p{19cm}})
	nobase addn("Note: The dependent variable is accepted wage. Controls include dummies for year of study, GPA, dummy for US-born, race dummies, dummy variables for college-graduate father/mother, separate indicator variables for majoring in engineering/computing and business/economics, and choices in the time preferences elicitation task. Robust standard errors in parentheses. ***significant at the 1\% level, **5\% level, *10\% level") 
	keep(female risk_r_std expprior_std fast) 
	indicate("Controls = gpa_wmiss", labels("X" ""))
	prehead(`"\begin{table}[htbp]\centering"' 
	`"\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}"'
	`"\caption{Gender Gap in Accepted Wage (Lab)} \label{table:expt_gendergap_acceptedwage}"'
	`"\bigskip"'
	`"\begin{tabular}{l*{@M}{c}}"'
	`"\toprule"' 
	`"& & \multicolumn{7}{c}{Dependent Variable: Accepted Wage}  \\ \cline{2-9}"' )
	replace;	

#delimit cr
** p-value for the difference in coefficients across specifications **
	
	quietly reg wageaccept female fast 1.glitch if roundaccept_sim~=6
	estimates store e1
	quietly reg wageaccept female risk_r_std expprior_std fast 1.glitch if roundaccept_sim~=6
	estimates store e2
	quietly reg wageaccept female fast 1.glitch $base_controls if roundaccept_sim~=6
	estimates store e3
	quietly reg wageaccept female risk_r_std expprior_std fast 1.glitch $base_controls if roundaccept_sim~=6
	estimates store e4

	suest e1 e2, vce(robust)
	test [e1_mean]female = [e2_mean]female
	suest e3 e4, vce(robust)
	test [e3_mean]female = [e4_mean]female

**************************************************************************************************
* Table A7: Gender Gap in the Likelihood of Being in the Tails of the Wage Distribution (Lab)
**************************************************************************************************

* regs_tab4_combined.tex

use temp_expt, clear

macro define base_controls "gpa_wmiss us_born_wmiss miss_gpa miss_us_born i.asuyear fa_educ_high_wmiss mo_educ_high_wmiss miss_fa_educ_high miss_mo_educ_high race_white_wmiss race_asian_wmiss miss_race_white miss_race_asian compeng_wmiss busecon_wmiss miss_compeng miss_busecon time_ce1_std time_ce2_std"

** PANEL A **

gen wageaccept_high2 = wageaccept>=26

	est clear
	eststo clear
	eststo: reg wageaccept_high2 female fast 1.glitch, r
	estadd ysumm
	eststo: reg wageaccept_high2 female fast risk_r_std 1.glitch, r 
	estadd ysumm
	eststo: reg wageaccept_high2 female fast expprior_std 1.glitch, r 
	estadd ysumm
	eststo: reg wageaccept_high2 female fast risk_r_std expprior_std 1.glitch, r 
	estadd ysumm

	eststo: reg wageaccept_high2 female fast 1.glitch $base_controls, r 
	estadd ysumm
	eststo: reg wageaccept_high2 female fast risk_r_std 1.glitch $base_controls, r 
	estadd ysumm
	eststo: reg wageaccept_high2 female fast expprior_std 1.glitch $base_controls, r 
	estadd ysumm
	eststo: reg wageaccept_high2 female fast risk_r_std expprior_std 1.glitch $base_controls, r 
	estadd ysumm
	
	# delimit ;
	esttab * using "${figures}tableA7a.tex", l stats(ymean r2 N, fmt(2 2 0) labels("Mean" "\(R^{2}\)" "N"))
	nomti se b(%4.2f) starlevels(* 0.10 ** 0.05 *** 0.01) style(tex) 
	booktabs collabels(none) gaps nonotes title("")
	substitute([htbp] [!htbp] \begin{tabular} \small\begin{tabular} {l} {p{18cm}})
	nobase addn("Note: The dependent variable is a dummy for accepting a high wage (i.e., greater than 23). Controls include dummies for year of study, GPA, dummy for US-born, race dummies, dummy variables for college-graduate father/mother, separate indicator variables for majoring in engineering/computing and business/economics, and choices in the time preferences elicitation task. Robust standard errors in parentheses. ***significant at the 1\% level, **5\% level, *10\% level") 
	keep(female risk_r_std expprior_std fast) 
	indicate("Controls = gpa_wmiss", labels("X" ""))
	prehead(`"\begin{table}[htbp]\centering"' 
	`"\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}"'
	`"\caption{Gender Gap in the Fraction of Students Who Accepted a High Wage (Lab)} \label{table:expt_gendergap_highwage}"'
	`"\bigskip"'
	`"\begin{tabular}{l*{@M}{c}}"'
	`"\toprule"' 
	`"& & \multicolumn{7}{c}{Dependent Variable: Accepted a High Wage}  \\ \cline{2-9}"' )
	replace;	

#delimit cr
** p-value for the difference in coefficients across specifications **
	
	quietly reg wageaccept_high2 female fast 1.glitch
	estimates store e1
	quietly reg wageaccept_high2 female risk_r_std expprior_std fast 1.glitch
	estimates store e2
	quietly reg wageaccept_high2 female fast 1.glitch $base_controls
	estimates store e3
	quietly reg wageaccept_high2 female risk_r_std expprior_std fast 1.glitch $base_controls
	estimates store e4

	quietly suest e1 e2, vce(robust)
	test [e1_mean]female = [e2_mean]female
	quietly suest e3 e4, vce(robust)
	test [e3_mean]female = [e4_mean]female

** PANEL B **

capture drop wageaccept_low
gen wageaccept_low = wageaccept<=5

	est clear
	eststo clear
	eststo: reg wageaccept_low female fast 1.glitch, r
	estadd ysumm
	eststo: reg wageaccept_low female fast risk_r_std 1.glitch, r 
	estadd ysumm
	eststo: reg wageaccept_low female fast expprior_std 1.glitch, r 
	estadd ysumm
	eststo: reg wageaccept_low female fast risk_r_std expprior_std 1.glitch, r 
	estadd ysumm

	eststo: reg wageaccept_low female fast 1.glitch $base_controls, r 
	estadd ysumm
	eststo: reg wageaccept_low female fast risk_r_std 1.glitch $base_controls, r 
	estadd ysumm
	eststo: reg wageaccept_low female fast expprior_std 1.glitch $base_controls, r 
	estadd ysumm
	eststo: reg wageaccept_low female fast risk_r_std expprior_std 1.glitch $base_controls, r 
	estadd ysumm

	# delimit ;
	esttab * using "${figures}tableA7b.tex", l stats(ymean r2 N, fmt(2 2 0) labels("Mean" "\(R^{2}\)" "N"))
	nomti se b(%4.3f) starlevels(* 0.10 ** 0.05 *** 0.01) style(tex) 
	booktabs collabels(none) gaps nonotes title("")
	substitute([htbp] [!htbp] \begin{tabular} \small\begin{tabular} {l} {p{18cm}})
	nobase addn("Note: The dependent variable is a dummy for obtaining a low final wage (i.e., less than or equal to \$5). Controls include dummies for year of study, GPA, dummy for US-born, race dummies, dummy variables for college-graduate father/mother, separate indicator variables for majoring in engineering/computing and business/economics, and choices in the time preferences elicitation task. Robust standard errors in parentheses. ***significant at the 1\% level, **5\% level, *10\% level") 
	keep(female risk_r_std expprior_std fast) 
	indicate("Controls = gpa_wmiss", labels("X" ""))
	prehead(`"\begin{table}[htbp]\centering"' 
	`"\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}"'
	`"\caption{Gender Gap in the Fraction of Students Who Obtained a Low Final Wage (Lab)} \label{table:expt_gendergap_lowwage}"'
	`"\bigskip"'
	`"\begin{tabular}{l*{@M}{c}}"'
	`"\toprule"' 
	`"& & \multicolumn{7}{c}{Dependent Variable: Obtained a Low Final Wage}  \\ \cline{2-9}"' )
	replace;	

#delimit cr
** p-value for the difference in coefficients across specifications **
	
	quietly reg wageaccept_low female fast 1.glitch
	estimates store e1
	quietly reg wageaccept_low female risk_r_std expprior_std fast 1.glitch
	estimates store e2
	quietly reg wageaccept_low female fast 1.glitch $base_controls
	estimates store e3
	quietly reg wageaccept_low female risk_r_std expprior_std fast 1.glitch $base_controls
	estimates store e4

	quietly suest e1 e2, vce(robust)
	test [e1_mean]female = [e2_mean]female
	quietly suest e3 e4, vce(robust)
	test [e3_mean]female = [e4_mean]female

*******************************************************************************************************
* Table A8: Gender Gap in the Likelihood of Obtaining a Final Wage Less than a Previously Offered Wage
*******************************************************************************************************

* regs_tab5_add_pvalue.tex

use temp_expt, clear

macro define base_controls "gpa_wmiss us_born_wmiss miss_gpa miss_us_born i.asuyear fa_educ_high_wmiss mo_educ_high_wmiss miss_fa_educ_high miss_mo_educ_high race_white_wmiss race_asian_wmiss miss_race_white miss_race_asian compeng_wmiss busecon_wmiss miss_compeng miss_busecon time_ce1_std time_ce2_std"

	est clear
	eststo clear
	eststo: reg wageoffer_miss female fast 1.glitch, r
	estadd ysumm
	eststo: reg wageoffer_miss female risk_r_std fast 1.glitch, r 
	estadd ysumm
	eststo: reg wageoffer_miss female expprior_std fast 1.glitch, r 
	estadd ysumm
	eststo: reg wageoffer_miss female risk_r_std expprior_std fast 1.glitch, r 
	estadd ysumm

	eststo: reg wageoffer_miss female fast 1.glitch $base_controls, r 
	estadd ysumm
	eststo: reg wageoffer_miss female risk_r_std fast 1.glitch $base_controls, r 
	estadd ysumm
	eststo: reg wageoffer_miss female expprior_std fast 1.glitch $base_controls, r 
	estadd ysumm
	eststo: reg wageoffer_miss female risk_r_std expprior_std fast 1.glitch $base_controls, r 
	estadd ysumm

	# delimit ;
	esttab * using "${figures}tableA8.tex", l stats(ymean r2 N, fmt(2 2 0) labels("Mean" "\(R^{2}\)" "N"))
	nomti se b(%4.3f) starlevels(* 0.10 ** 0.05 *** 0.01) style(tex) 
	booktabs collabels(none) gaps nonotes title("")
	substitute([htbp] [!htbp] \begin{tabular} \small\begin{tabular} {l} {p{19cm}})
	nobase addn("Note: The dependent variable is an indicator for accepting a final wage that is lower than a previously offered wage. Controls include dummies for year of study, GPA, dummy for US-born, race dummies, dummy variables for college-graduate father/mother, separate indicator variables for majoring in engineering/computing and business/economics, and choices in the time preferences elicitation task. Robust standard errors in parentheses. ***significant at the 1\% level, **5\% level, *10\% level") 
	keep(female risk_r_std expprior_std fast) 
	indicate("Controls = gpa_wmiss", labels("X" ""))
	prehead(`"\begin{table}[htbp]\centering"' 
	`"\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}"'
	`"\caption{Gender Gap in the Likelihood of Accepting a Final Wage Less than a Previously Offered Wage (Lab)} \label{regs_tab5}"'
	`"\bigskip"'
	`"\begin{tabular}{l*{@M}{c}}"'
	`"\toprule"' 
	`"& & \multicolumn{7}{c}{Dependent Variable: Final Wage $<$ Previously Offered Wage}  \\ \cline{2-9}"' )
	replace;	

#delimit cr
** p-value for the difference in coefficients across specifications **
	
	eststo clear
	eststo: reg wageoffer_miss female fast 1.glitch
	estadd ysumm
	estimates store e1
	
	eststo: reg wageoffer_miss female risk_r_std expprior_std fast 1.glitch
	estadd ysumm
	estimates store e2
	
	eststo: reg wageoffer_miss female fast 1.glitch $base_controls
	estadd ysumm
	estimates store e3
	
	eststo: reg wageoffer_miss female risk_r_std expprior_std fast 1.glitch $base_controls
	estadd ysumm
	estimates store e4
	
	suest e1 e2, vce(robust)
	test [e1_mean]female = [e2_mean]female
	suest e3 e4, vce(robust)
	test [e3_mean]female = [e4_mean]female

***********************************************
* Table A9: Qualification By Acceptance Month
***********************************************

* rejection_aversion.tex

	use postg_analysis.dta, clear

	rename propoverqual_3 over_qual
	rename proprightqual_3 right_qual
	rename propunderqual_3 under_qual
	
	gen after_grad=accept_mo>0 if accepted==1
	
	cap file close table
	file open table using "${figures}tableA9.tex", write replace
	local fwt "file write table"
	`fwt' "\begin{table}[H]\caption{Qualification By Acceptance Month} \centering \begin{threeparttable} \begin{tabularx}{\textwidth}{lrlYYY} " _n
	`fwt' " & &  & Accept Before Grad & Accept After Grad & P-Value \\ \hline" _n
	
	gen male=gender==2 if !missing(gender)
	
	local gender_restrict_0 "!=."
	local gender_restrict_1 "==1"
	local gender_restrict_2 "==0"
	
	local label_0 "Both"
	local label_1 "Men"
	local label_2 "Women"
	
	forval g=0/2 {
		foreach type in over right under {
			forval t=0/1{
				quietly sum `type'_qual if after_grad==`t' & male`gender_restrict_`g''
				local `type'_`t'_mean: display %3.1f r(mean)
			}
			quietly ttest `type'_qual if male`gender_restrict_`g'', by(after_grad)
			local `type'_p: display %4.3f r(p)
			local N: display r(N_1)+r(N_2)
		}
	`fwt' "  & & & & \\" _n	
	`fwt' "   \multirow{3}{*}{\shortstack[l]{`label_`g'' \\ $\left[`N'\right]$}}  & Prop. Apps. & Over Qualified & `over_0_mean' & `over_1_mean' & `over_p' \\" _n
	`fwt' "   & & Qualified & `right_0_mean' & `right_1_mean' & `right_p' \\" _n
	`fwt' "  & & Under Qualified & `under_0_mean' & `under_1_mean' & `under_p' \\" _n
	}
	`fwt' "   & & & \\ \hline" _n
	
	`fwt' "\end{tabularx}"_n
	`fwt' "\begin{tablenotes} \item[] \footnotesize"_n
	`fwt' "\emph{Notes:} Sample sizes in brackets." _n
	`fwt' "\end{tablenotes} \end{threeparttable} \end{table}" _n
	file close table

*****************************************************************
* Table A10: Experimental Sample Compared to the ASU Population
*****************************************************************

* PLEASE CONTACT AUTHORS FOR ASU ADMIN DATA AND DO-FILE

/**********************************************/
/* APPENDIX E Robustness of Empirical Results */
/**********************************************/

// E.I Using Logs vs. Levels for Earnings Outcomes //

*****************************************************************************************************
* Figure E1.A: Cumulative Mean Accepted Earnings and Gender Gap by Months Since Graduation (in Logs)
*****************************************************************************************************

* figure2_logs.pdf
* fact2_gap_logs.pdf

** PANEL A **

use BU_grad_analysis_sample_aug2021.dta, clear //reset dataset
keep if accepted==1
keep if abs(accept_mo)<=15
	
gen male = 1-female
replace first_industry=13 if accepted==1 & missing(first_industry)

gen first_benefit_matorpat= (first_benefit_maternity==1 | first_benefit_paternity==1) if !missing(first_benefit_maternity) & !missing(first_benefit_maternity)
	
foreach var of varlist exp_earn_growth_1yr{
	gen `var'_miss=missing(`var')
	replace `var'=0 if missing(`var')
	}

** Unconditional **

cap program drop fig2_levels
program fig2_levels, rclass
cap drop cum_mean0
cap drop cum_mean1
cap drop cum_mean0_se
cap drop cum_mean1_se
gen cum_mean0=.
gen cum_mean1=.
gen cum_mean0_se=.
gen cum_mean1_se=.

cap drop mo
gen mo=_n-16 if _n<=32

forvalues m=-9/9{
	local m_pos=`m'+16
	reg first_total_nt if accepted==1 & accept_mo<=`m' & female==0
	replace cum_mean0 = _b[_cons] in `m_pos'
	replace cum_mean0_se = _se[_cons] in `m_pos'
	reg first_total_nt if accepted==1 & accept_mo<=`m' & female==1
	replace cum_mean1 = _b[_cons] in `m_pos'
	replace cum_mean1_se = _se[_cons] in `m_pos'
}
reg cum_mean0 mo
return scalar slope_g0 = _b[mo]
reg cum_mean1 mo
return scalar slope_g1 = _b[mo]
end

cap program drop fig2_logs
program fig2_logs, rclass
cap drop log_cum_mean0
cap drop log_cum_mean1
cap drop log_cum_mean0_se
cap drop log_cum_mean1_se
gen log_cum_mean0=.
gen log_cum_mean1=.
gen log_cum_mean0_se=.
gen log_cum_mean1_se=.

cap drop mo
gen mo=_n-16 if _n<=32

forvalues m=-9/9{
	local m_pos=`m'+16
	reg log_first_total_nt if accepted==1 & accept_mo<=`m' & female==0
	replace log_cum_mean0 = _b[_cons] in `m_pos'
	replace log_cum_mean0_se = _se[_cons] in `m_pos'
	reg log_first_total_nt if accepted==1 & accept_mo<=`m' & female==1
	replace log_cum_mean1 = _b[_cons] in `m_pos'
	replace log_cum_mean1_se = _se[_cons] in `m_pos'
}
reg log_cum_mean0 mo
return scalar log_slope_g0 = _b[mo]
reg log_cum_mean1 mo
return scalar log_slope_g1 = _b[mo]
end

** PLOTTING THE FIGURE **

set more off 

// getting the coefficients
fig2_levels
fig2_logs

reg cum_mean0 mo
local slope_g0: display %4.1f _b[mo]
reg cum_mean1 mo
local slope_g1: display %4.1f _b[mo]

reg log_cum_mean0 mo
local log_slope_g0: display %9.4f _b[mo]
reg log_cum_mean1 mo
local log_slope_g1: display %9.4f _b[mo]

cap drop ub_* lb_* log_ub_* log_lb_*
gen ub_g0 = cum_mean0 + 1.96*cum_mean0_se
gen lb_g0 = cum_mean0 - 1.96*cum_mean0_se

gen ub_g1 = cum_mean1 + 1.96*cum_mean1_se
gen lb_g1 = cum_mean1 - 1.96*cum_mean1_se

gen log_ub_g0 = log_cum_mean0 + 1.96*log_cum_mean0_se
gen log_lb_g0 = log_cum_mean0 - 1.96*log_cum_mean0_se

gen log_ub_g1 = log_cum_mean1 + 1.96*log_cum_mean1_se
gen log_lb_g1 = log_cum_mean1 - 1.96*log_cum_mean1_se

twoway  (rarea log_ub_g0 log_lb_g0 mo if mo>=-9 & mo<=9, lwidth(none) fcolor(navy%20)) (rarea log_ub_g1 log_lb_g1 mo if mo>=-9 & mo<=9, lwidth(none) fcolor(maroon%20))  ///
(line log_cum_mean0 mo if mo>=-9 & mo<=9, lcolor(navy)) (line log_cum_mean1 mo if mo>=-9 & mo<=9, lcolor(maroon) lp(dash)), ylabel(10.8(0.1)11.4) xlabel(-9(3)9) ///
graphregion(color(white)) xtitle("Months Since Graduation", size(medsmall)) ytitle("Cumulative Mean Log Accepted Offer" " ", size(medsmall))  ///
legend(order(3 "Male" 4 "Female") size(medsmall)) name(figure2, replace) note("Male Slope: `log_slope_g0'***" "Female Slope: `log_slope_g1'***", ring(0) pos(2) size(small)) 
graph export "${figures}figureE1A_a.pdf", replace

** PANEL B **

** Residualizing **

macro define controls1 "i.cohort conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ"
macro define controls2 "i.cohort conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ i.first_industry"
macro define controls3 "i.cohort conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ i.first_industry i.first_benefit_flexwork i.first_benefit_maternity i.first_benefit_paternity i.first_benefit_sickleave i.first_benefit_childcare exp_earn_growth_1yr exp_earn_growth_1yr_miss"

* controls *

cap drop *_res1 
cap drop *_res2 
cap drop *_res3

set more off
reg first_total_nt $controls1
predict first_total_nt_res1, res
reg log_first_total_nt $controls1
predict log_first_total_nt_res1, res

reg first_total_nt $controls2
predict first_total_nt_res2, res
reg log_first_total_nt $controls2
predict log_first_total_nt_res2, res

reg first_total_nt $controls3
predict first_total_nt_res3, res
reg log_first_total_nt $controls3
predict log_first_total_nt_res3, res

set more off 
cap program drop gap_raw
program gap_raw, rclass
cap drop cum_gap cum_gap_se
cap drop log_cum_gap log_cum_gap_se
gen cum_gap=.
gen cum_gap_se=.
gen log_cum_gap=.
gen log_cum_gap_se=.
cap drop mo
gen mo=_n-16 if _n<=32

forvalues m=-9/9{
	local m_pos=`m'+16
	reg first_total_nt male if accepted==1 & accept_mo<=`m'
	replace cum_gap = _b[male] in `m_pos'
	replace cum_gap_se = _se[male] in `m_pos'
}

forvalues m=-9/9{
	local m_pos=`m'+16
	reg log_first_total_nt male if accepted==1 & accept_mo<=`m'
	replace log_cum_gap = _b[male] in `m_pos'
	replace log_cum_gap_se = _se[male] in `m_pos'
}

reg cum_gap mo
return scalar slope_mo = _b[mo]

reg log_cum_gap mo
return scalar log_slope_mo = _b[mo]
end

set more off 
cap program drop gap_res1
program gap_res1, rclass
cap drop cum_gap_res1 cum_gap_res1_se
cap drop log_cum_gap_res1 log_cum_gap_res1_se
gen cum_gap_res1=.
gen cum_gap_res1_se=.
gen log_cum_gap_res1=.
gen log_cum_gap_res1_se=.
cap drop mo
gen mo=_n-16 if _n<=32

forvalues m=-9/9{
	local m_pos=`m'+16
	reg first_total_nt_res1 male if accepted==1 & accept_mo<=`m'
	replace cum_gap_res1 = _b[male] in `m_pos'
	replace cum_gap_res1_se = _se[male] in `m_pos'
}

forvalues m=-9/9{
	local m_pos=`m'+16
	reg log_first_total_nt_res1 male if accepted==1 & accept_mo<=`m'
	replace log_cum_gap_res1 = _b[male] in `m_pos'
	replace log_cum_gap_res1_se = _se[male] in `m_pos'
}

reg cum_gap_res1 mo
return scalar slope_mo_res1 = _b[mo]

reg log_cum_gap_res1 mo
return scalar log_slope_mo_res1 = _b[mo]
end

cap program drop gap_res2
program gap_res2, rclass
cap drop cum_gap_res2 cum_gap_res2_se
cap drop log_cum_gap_res2 log_cum_gap_res2_se
gen cum_gap_res2=.
gen cum_gap_res2_se=.
gen log_cum_gap_res2=.
gen log_cum_gap_res2_se=.
cap drop mo
gen mo=_n-16 if _n<=32

forvalues m=-9/9{
	local m_pos=`m'+16
	reg first_total_nt_res2 male if accepted==1 & accept_mo<=`m'
	replace cum_gap_res2 = _b[male] in `m_pos'
	replace cum_gap_res2_se = _se[male] in `m_pos'
}

forvalues m=-9/9{
	local m_pos=`m'+16
	reg log_first_total_nt_res2 male if accepted==1 & accept_mo<=`m'
	replace log_cum_gap_res2 = _b[male] in `m_pos'
	replace log_cum_gap_res2_se = _se[male] in `m_pos'
}

reg cum_gap_res2 mo
return scalar slope_mo_res2 = _b[mo]

reg log_cum_gap_res2 mo
return scalar log_slope_mo_res2 = _b[mo]
end

cap program drop gap_res3
program gap_res3, rclass
cap drop cum_gap_res3 cum_gap_res3_se
cap drop log_cum_gap_res3 log_cum_gap_res3_se
gen cum_gap_res3=.
gen cum_gap_res3_se=.
gen log_cum_gap_res3=.
gen log_cum_gap_res3_se=.
cap drop mo
gen mo=_n-16 if _n<=32

forvalues m=-9/9{
	local m_pos=`m'+16
	reg first_total_nt_res3 male if accepted==1 & accept_mo<=`m'
	replace cum_gap_res3 = _b[male] in `m_pos'
	replace cum_gap_res3_se = _se[male] in `m_pos'
}

forvalues m=-9/9{
	local m_pos=`m'+16
	reg log_first_total_nt_res3 male if accepted==1 & accept_mo<=`m'
	replace log_cum_gap_res3 = _b[male] in `m_pos'
	replace log_cum_gap_res3_se = _se[male] in `m_pos'
}

reg cum_gap_res3 mo
return scalar slope_mo_res3 = _b[mo]

reg log_cum_gap_res3 mo
return scalar log_slope_mo_res3 = _b[mo]
end

** PLOTTING THE FIGURE **

// getting the coefficients

set logtype text 
cap log close
log using fact2_gap, replace

set more off
gap_raw
gap_res1
gap_res2
gap_res3

reg log_cum_gap mo
local log_slope: display %9.4f _b[mo]
reg log_cum_gap_res1 mo
local log_slope_res1: display %9.4f _b[mo]
reg log_cum_gap_res2 mo
local log_slope_res2: display %9.4f _b[mo]
reg log_cum_gap_res3 mo
local log_slope_res3: display %9.4f _b[mo]

# delimit ;
graph tw line log_cum_gap mo if mo>=-9 & mo<=9, msize(small) lpattern(solid) || 
line log_cum_gap_res1 mo if mo>=-9 & mo<=9, msize(small) lpattern(longdash) ||
line log_cum_gap_res2 mo if mo>=-9 & mo<=9, msize(small) lpattern(dash_dot) 
ylabel(0(0.04)0.18, labsize(small)) xlabel(-9(3)9, labsize(small))
xtitle("Months Since Graduation", size(small)) ytitle("Cumulative Gender Gap in Residualized Log Earnings" "", size(small))
legend(label(1 "No Controls") label(2 "Basic Controls") label(3 "Basic Controls + Industry FE") size(small))
note("Slope (no controls): `log_slope'" "Slope (w/ basic controls): `log_slope_res1'*" 
"Slope (w/ basic controls + industry FE): `log_slope_res2'*", ring(0) pos(2) size(small))
graphregion(color(white));

graph export "${figures}figureE1A_b.pdf", replace;

************************************************************************************
* Figure E1.B: Ex-Ante Log Reservation Earnings, Risk Preferences, and Overoptimism
************************************************************************************

* fig9_reservation_wage_logs.pdf

use temp_res_wages, clear

reg ln_reservation_wage_1W risk2 if weird~=1 & reservation_wage_1>=20000, robust
			local b: display %4.3fc _b[risk2]
			local N=e(N)
			mat table=r(table)
			local p table[4,1]
			local stars "   "
			if `p'<0.1 {
				local stars "*  "
			}
			if `p'<0.05 {
				local stars "** "
			}
			if `p'<0.01 {
				local stars "***"
			}
# delimit ;			
binscatter ln_reservation_wage_1W risk2 if weird~=1 & reservation_wage_1>=20000, 
xtitle("Willingness to Take Risk (Average)", size(medsmall)) ytitle("Log Ex-Ante Reservation Earnings", size(medsmall))
xlabel(1(1)6) ylabel(10.75(0.1)11.05) graphregion(color(white)) 
note("Coef: `b'`stars'" "N: `N'", ring(0) pos(2) size(small));
graph save log_reservation_wage_risk, replace;

# delimit cr
reg ln_reservation_wage_1W ocW if weird~=1 & reservation_wage_1>=20000, robust
			local b: display %4.3fc _b[ocW]
			local N=e(N)
			mat table=r(table)
			local p table[4,1]
			local stars "   "
			if `p'<0.1 {
				local stars "*  "
			}
			if `p'<0.05 {
				local stars "** "
			}
			if `p'<0.01 {
				local stars "***"
			}
# delimit ;			
binscatter ln_reservation_wage_1W ocW if weird~=1 & reservation_wage_1>=20000, 
xtitle("Overoptimism: [(Expect - Realized)/Realized]*100%") ytitle("Log Ex-Ante Reservation Earnings", size(medsmall))
xlabel() ylabel() graphregion(color(white)) 
note("Coef: `b'`stars'" "N: `N'", ring(0) pos(2) size(small));
graph save log_reservation_wage_ocW, replace;

# delimit cr
graph combine log_reservation_wage_risk.gph log_reservation_wage_ocW.gph, graphregion(color(white)) iscale(0.7)
graph export "${figures}figureE1B.pdf", replace
erase log_reservation_wage_risk.gph
erase log_reservation_wage_ocW.gph

*****************************************************************************************************
* Table E1.A: Relationship Between Cumulative Log Gender Earnings Gap and Month Since Graduation
*****************************************************************************************************

* fact2_table_logs.tex

use BU_grad_analysis_sample_aug2021.dta, clear 
keep if accepted==1
keep if abs(accept_mo)<=15
	
gen male = 1-female
replace first_industry=13 if accepted==1 & missing(first_industry)

gen first_benefit_matorpat= (first_benefit_maternity==1 | first_benefit_paternity==1) if !missing(first_benefit_maternity) & !missing(first_benefit_maternity)
	
foreach var of varlist exp_earn_growth_1yr{
	gen `var'_miss=missing(`var')
	replace `var'=0 if missing(`var')
	}

** Residualizing **

macro define controls1 "i.cohort conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ"
macro define controls2 "i.cohort conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ i.first_industry"
macro define controls3 "i.cohort conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ i.first_industry i.first_benefit_flexwork i.first_benefit_maternity i.first_benefit_paternity i.first_benefit_sickleave i.first_benefit_childcare exp_earn_growth_1yr exp_earn_growth_1yr_miss"

* controls *

cap drop *_res1 
cap drop *_res2 
cap drop *_res3

set more off
reg first_total_nt $controls1
predict first_total_nt_res1, res
reg log_first_total_nt $controls1
predict log_first_total_nt_res1, res

reg first_total_nt $controls2
predict first_total_nt_res2, res
reg log_first_total_nt $controls2
predict log_first_total_nt_res2, res

reg first_total_nt $controls3
predict first_total_nt_res3, res
reg log_first_total_nt $controls3
predict log_first_total_nt_res3, res

set more off 
cap program drop gap_raw
program gap_raw, rclass
cap drop cum_gap cum_gap_se
cap drop log_cum_gap log_cum_gap_se
gen cum_gap=.
gen cum_gap_se=.
gen log_cum_gap=.
gen log_cum_gap_se=.
cap drop mo
gen mo=_n-16 if _n<=32

forvalues m=-9/9{
	local m_pos=`m'+16
	reg first_total_nt male if accepted==1 & accept_mo<=`m'
	replace cum_gap = _b[male] in `m_pos'
	replace cum_gap_se = _se[male] in `m_pos'
}

forvalues m=-9/9{
	local m_pos=`m'+16
	reg log_first_total_nt male if accepted==1 & accept_mo<=`m'
	replace log_cum_gap = _b[male] in `m_pos'
	replace log_cum_gap_se = _se[male] in `m_pos'
}

reg cum_gap mo
return scalar slope_mo = _b[mo]

reg log_cum_gap mo
return scalar log_slope_mo = _b[mo]
end

set more off 
cap program drop gap_res1
program gap_res1, rclass
cap drop cum_gap_res1 cum_gap_res1_se
cap drop log_cum_gap_res1 log_cum_gap_res1_se
gen cum_gap_res1=.
gen cum_gap_res1_se=.
gen log_cum_gap_res1=.
gen log_cum_gap_res1_se=.
cap drop mo
gen mo=_n-16 if _n<=32

forvalues m=-9/9{
	local m_pos=`m'+16
	reg first_total_nt_res1 male if accepted==1 & accept_mo<=`m'
	replace cum_gap_res1 = _b[male] in `m_pos'
	replace cum_gap_res1_se = _se[male] in `m_pos'
}

forvalues m=-9/9{
	local m_pos=`m'+16
	reg log_first_total_nt_res1 male if accepted==1 & accept_mo<=`m'
	replace log_cum_gap_res1 = _b[male] in `m_pos'
	replace log_cum_gap_res1_se = _se[male] in `m_pos'
}

reg cum_gap_res1 mo
return scalar slope_mo_res1 = _b[mo]

reg log_cum_gap_res1 mo
return scalar log_slope_mo_res1 = _b[mo]
end

cap program drop gap_res2
program gap_res2, rclass
cap drop cum_gap_res2 cum_gap_res2_se
cap drop log_cum_gap_res2 log_cum_gap_res2_se
gen cum_gap_res2=.
gen cum_gap_res2_se=.
gen log_cum_gap_res2=.
gen log_cum_gap_res2_se=.
cap drop mo
gen mo=_n-16 if _n<=32

forvalues m=-9/9{
	local m_pos=`m'+16
	reg first_total_nt_res2 male if accepted==1 & accept_mo<=`m'
	replace cum_gap_res2 = _b[male] in `m_pos'
	replace cum_gap_res2_se = _se[male] in `m_pos'
}

forvalues m=-9/9{
	local m_pos=`m'+16
	reg log_first_total_nt_res2 male if accepted==1 & accept_mo<=`m'
	replace log_cum_gap_res2 = _b[male] in `m_pos'
	replace log_cum_gap_res2_se = _se[male] in `m_pos'
}

reg cum_gap_res2 mo
return scalar slope_mo_res2 = _b[mo]

reg log_cum_gap_res2 mo
return scalar log_slope_mo_res2 = _b[mo]
end

cap program drop gap_res3
program gap_res3, rclass
cap drop cum_gap_res3 cum_gap_res3_se
cap drop log_cum_gap_res3 log_cum_gap_res3_se
gen cum_gap_res3=.
gen cum_gap_res3_se=.
gen log_cum_gap_res3=.
gen log_cum_gap_res3_se=.
cap drop mo
gen mo=_n-16 if _n<=32

forvalues m=-9/9{
	local m_pos=`m'+16
	reg first_total_nt_res3 male if accepted==1 & accept_mo<=`m'
	replace cum_gap_res3 = _b[male] in `m_pos'
	replace cum_gap_res3_se = _se[male] in `m_pos'
}

forvalues m=-9/9{
	local m_pos=`m'+16
	reg log_first_total_nt_res3 male if accepted==1 & accept_mo<=`m'
	replace log_cum_gap_res3 = _b[male] in `m_pos'
	replace log_cum_gap_res3_se = _se[male] in `m_pos'
}

reg cum_gap_res3 mo
return scalar slope_mo_res3 = _b[mo]

reg log_cum_gap_res3 mo
return scalar log_slope_mo_res3 = _b[mo]
end

* levels *
cap program drop fig2_levels
program fig2_levels, rclass
cap drop cum_mean0 
cap drop cum_mean1
cap drop cum_mean0_se
cap drop cum_mean1_se
cap drop log_cum_mean0 log_cum_mean1 log_cum_mean0_se log_cum_mean1_se
gen cum_mean0=.
gen cum_mean1=.
gen cum_mean0_se=.
gen cum_mean1_se=.

gen log_cum_mean0=.
gen log_cum_mean1=.
gen log_cum_mean0_se=.
gen log_cum_mean1_se=.

cap drop mo
gen mo=_n-16 if _n<=32

forvalues m=-9/9{
	local m_pos=`m'+16
	reg first_total_nt if accepted==1 & accept_mo<=`m' & female==0
	replace cum_mean0 = _b[_cons] in `m_pos'
	replace cum_mean0_se = _se[_cons] in `m_pos'
	reg first_total_nt if accepted==1 & accept_mo<=`m' & female==1
	replace cum_mean1 = _b[_cons] in `m_pos'
	replace cum_mean1_se = _se[_cons] in `m_pos'
}

forvalues m=-9/9{
	local m_pos=`m'+16
	reg log_first_total_nt if accepted==1 & accept_mo<=`m' & female==0
	replace log_cum_mean0 = _b[_cons] in `m_pos'
	replace log_cum_mean0_se = _se[_cons] in `m_pos'
	reg log_first_total_nt if accepted==1 & accept_mo<=`m' & female==1
	replace log_cum_mean1 = _b[_cons] in `m_pos'
	replace log_cum_mean1_se = _se[_cons] in `m_pos'
}

reg cum_mean0 mo
return scalar slope_g0 = _b[mo]
reg cum_mean1 mo
return scalar slope_g1 = _b[mo]

reg log_cum_mean0 mo
return scalar log_slope_g0 = _b[mo]
reg log_cum_mean1 mo
return scalar log_slope_g1 = _b[mo]

end

** PLOTTING THE FIGURE **

// getting the coefficients

set logtype text 
cap log close
log using fact2_gap, replace

set more off
fig2_levels
gap_raw
gap_res1
gap_res2
gap_res3

** bootstrap **

parallel setclusters 8

set more off
capture log close
set logtype text
log using "${figures}table4_tableE1A", replace

cd "${tempfolder}"

parallel bs, exp(slope=r(slope_mo)) reps(1000) nodrop seed(3 3 3 3 3 3 3 3): gap_raw
parallel bs, exp(slope=r(slope_mo_res1)) reps(1000) nodrop seed(3 3 3 3 3 3 3 3): gap_res1
parallel bs, exp(slope=r(slope_mo_res2)) reps(1000) nodrop seed(3 3 3 3 3 3 3 3): gap_res2
parallel bs, exp(slope=r(slope_mo_res3)) reps(1000) nodrop seed(3 3 3 3 3 3 3 3): gap_res3

** in logs **
parallel bs, exp(slope=r(log_slope_mo)) reps(1000) nodrop seed(3 3 3 3 3 3 3 3): gap_raw
parallel bs, exp(slope=r(log_slope_mo_res1)) reps(1000) nodrop seed(3 3 3 3 3 3 3 3): gap_res1
parallel bs, exp(slope=r(log_slope_mo_res2)) reps(1000) nodrop seed(3 3 3 3 3 3 3 3): gap_res2
parallel bs, exp(slope=r(log_slope_mo_res3)) reps(1000) nodrop seed(3 3 3 3 3 3 3 3): gap_res3

log close

*****************************************************************************************************
* Table E1.B: Gender Gap in Log Reservation Earnings
*****************************************************************************************************

* reservation_gg_regs_v2_logs.tex

use temp_res_wages, replace	

gen us_born=us_born_1
replace us_born= birthco_3=="United States" if !missing(birthco_3) & missing(us_born)

gen gpa = gpa_1

* Concentration Dummies
gen conc_acc = (major_1=="Accounting"|second_major_1=="Accounting"|third_major_1=="Accounting")
label var conc_acc "Concentration in Accounting, could be first, second, or third major_1"

gen conc_ent = (major_1=="Entrepreneurship"|second_major_1=="Entrepreneurship"|third_major_1=="Entrepreneurship")
label var conc_ent "Concentration in Entrepreneurship, could be first, second, or third major_1"

gen conc_fin = (major_1=="Finance"|second_major_1=="Finance"|third_major_1=="Finance")
label var conc_fin "Concentration in Finance, could be first, second, or third major_1"

gen conc_gen_mgt = (major_1=="General Management"|second_major_1=="General Management"|third_major_1=="General Management")
label var conc_gen_mgt "Concentration in General Management, could be first, second, or third major_1"

gen conc_int_mgt = (major_1=="International Management"|second_major_1=="International Management"|third_major_1=="International Management")
label var conc_int_mgt "Concentration in International Management, could be first, second, or third major_1"

gen conc_law = (major_1=="Law"|second_major_1=="Law"|third_major_1=="Law")
label var conc_law "Concentration in Law, could be first, second, or third major_1"

gen conc_mis = (major_1=="Management Information Systems"|second_major_1=="Management Information Systems"|third_major_1=="Management Information Systems")
label var conc_mis "Concentration in Management Information Systems, could be first, second, or third major_1"

gen conc_mkg = (major_1=="Marketing"|second_major_1=="Marketing"|third_major_1=="Marketing")
label var conc_mkg "Concentration in Marketing, could be first, second, or third major_1"

gen conc_otm = (major_1=="Operations and Technology Management"|second_major_1=="Operations and Technology Management"|third_major_1=="Operations and Technology Management")
label var conc_otm "Concentration in Operations and Technology Management, could be first, second, or third major_1"

gen conc_ob = (major_1=="Organizational Behavior"|second_major_1=="Organizational Behavior"|third_major_1=="Organizational Behavior")
label var conc_ob "Concentration in Organizational Behavior, could be first, second, or third major_1"

* parental education

gen fa_edu=fa_edu_1
replace fa_edu=fa_edu_3 if missing(fa_edu)
	
gen mo_edu=mo_edu_1
replace mo_edu=mo_edu_3 if missing(mo_edu)

gen fa_educ = .
replace fa_educ = 1 if fa_edu == "Less than HS" | fa_edu == "Did not finish high school" | fa_edu == "MIDDLE SCHOOL" | fa_edu == "My Father Has No Degrees" | fa_edu == "no degree" | fa_edu == "middle school" | fa_edu == "equivalent of middle school in ecuador " | fa_edu == "He only completed up to middle school " | fa_edu == "Did not finish high school "
replace fa_educ = 2 if fa_edu == "High School Degree"
replace fa_educ = 3 if fa_edu == "Some College/Associate Degree" | fa_edu == "Trade School" | fa_edu=="CPA"
replace fa_educ = 4 if fa_edu == "Bachelor (B.A., B.S)"
replace fa_educ = 5 if fa_edu == "Medicine (M.D.)" | fa_edu == "DDS" | fa_edu == "B.S. and Pharm. D " | fa_edu == "DMD"
replace fa_educ = 6 if fa_edu == "Masters (M.A., M.S., M.F.A.)" | fa_edu == "MBA" | fa_edu == "JD and an MBA"
replace fa_educ = 7 if fa_edu == "Law (J.D.)"
replace fa_educ = 8 if fa_edu == "Doctorate (Ph.D)" 
replace fa_educ = 9 if fa_educ ==. 
replace fa_educ = 9 if fa_edu == ""

gen mo_educ = .
replace mo_educ = 1 if mo_edu == "Less than HS" | mo_edu == "Did not finish high school" | mo_edu == "MIDDLE SCHOOL" | mo_edu == "My mother Has No Degrees" | mo_edu == "no degree" | mo_edu == "middle school" | mo_edu == "equivalent of middle school in ecuador " | mo_edu == "He only completed up to middle school " | mo_edu == "Did not finish high school " | mo_edu=="Didn't complete high school" | mo_edu == "GED"
replace mo_educ = 2 if mo_edu == "High School Degree"
replace mo_educ = 3 if mo_edu == "Some College/Associate Degree" | mo_edu == "Trade School" | mo_edu=="CPA"
replace mo_educ = 4 if mo_edu == "Bachelor (B.A., B.S)"
replace mo_educ = 5 if mo_edu == "Medicine (M.D.)" | mo_edu == "Doctor of Veterinary Medicine" | mo_edu == "DDS" | mo_edu == "B.S. and Pharm. D "
replace mo_educ = 6 if mo_edu == "Masters (M.A., M.S., M.F.A.)" | mo_edu == "MBA" | mo_edu == "JD and an MBA"
replace mo_educ = 7 if mo_edu == "Law (J.D.)"
replace mo_educ = 8 if mo_edu == "Doctorate (Ph.D)"
replace mo_educ = 9 if mo_educ ==. 
replace mo_educ = 9 if mo_edu == ""

* race
	local white_str "White"
	local black_str "Black"
	local am_india_str "American"
	local latino_str "Hispanic"
	local asia_pacif "Asian"
	foreach race in "white" "black" "am_india" "latino" "asia_pacif"{
		gen race_`race' = strpos(race_1, "``race'_str'")!=0 if !missing(race_1)
		replace race_`race' = strpos(race_3, "``race'_str'")!=0 if !missing(race_3) & missing(race_1)
		replace race_`race'=0 if missing(race_`race')
	}
	
	gen miss_race=missing(race_1) & missing(race_3)

rename female_1 female

gen ocW_miss = ocW==.
capture drop ocW_wmiss
gen ocW_wmiss = ocW
replace ocW_wmiss = 999 if ocW==.

	set more off 
	macro define controls1 "i.cohort_1 conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ"
	
	est clear
	eststo clear
	eststo: reg ln_reservation_wage_1W female if weird~=1 & reservation_wage_1>=20000 & risk2~=., robust
	estadd ysumm
	eststo: reg ln_reservation_wage_1W female risk2 if weird~=1 & reservation_wage_1>=20000 & risk2~=., robust
	estadd ysumm
	eststo: reg ln_reservation_wage_1W female ocW_wmiss ocW_miss  if weird~=1 & reservation_wage_1>=20000 & risk2~=., robust
	estadd ysumm
	eststo: reg ln_reservation_wage_1W female risk2 ocW_wmiss ocW_miss if weird~=1 & reservation_wage_1>=20000 & risk2~=., robust
	estadd ysumm
	eststo: reg ln_reservation_wage_1W female $controls1 if weird~=1 & reservation_wage_1>=20000 & risk2~=., robust
	estadd ysumm
	eststo: reg ln_reservation_wage_1W female $controls1 risk2 if weird~=1 & reservation_wage_1>=20000 & risk2~=., robust
	estadd ysumm
	eststo: reg ln_reservation_wage_1W female $controls1 ocW_wmiss ocW_miss if weird~=1 & reservation_wage_1>=20000 & risk2~=., robust
	estadd ysumm
	eststo: reg ln_reservation_wage_1W female $controls1 risk2 ocW_wmiss ocW_miss if weird~=1 & reservation_wage_1>=20000 & risk2~=., robust
	estadd ysumm	

	# delimit ;
	esttab * using "${figures}tableE1B.tex", l stats(ymean r2 N, fmt(3 3 0) labels("Mean" "\(R^{2}\)" "N"))  title("Gender Gap in Log Reservation Earnings") 
	nomti se b(%4.3f) starlevels(* 0.10 ** 0.05 *** 0.01) style(tex) 
	booktabs collabels(none) gaps
	nobase addn( "Note: The dependent variable is the natural log of ex-ante reservation earnings (in 2017 dollars). Basic controls include cohort fixed effects, major fixed effects, 
	GPA, dummy for US-born, and fixed effects for race, father's education, and mother's education. 
	Robust standard errors in parentheses. ***significant at the 1\% level, **5\% level, *10\% level.") 
	keep(female risk2 ocW_wmiss ocW_miss) 
	indicate("Controls = gpa", labels("X" "")) replace;	
	
** p-value for the difference in coefficients across specifications **

 # delimit cr
 eststo clear
	eststo: reg ln_reservation_wage_1W female if weird~=1 & reservation_wage_1>=20000 & risk2~=.
	estadd ysumm
	estimates store m1
	
 eststo: reg ln_reservation_wage_1W female risk2 ocW_wmiss ocW_miss if weird~=1 & reservation_wage_1>=20000 & risk2~=.
	estadd ysumm
	estimates store m2
	
	eststo: reg ln_reservation_wage_1W female $controls1 if weird~=1 & reservation_wage_1>=20000 & risk2~=.
	estadd ysumm
	estimates store m3
	
	eststo: reg ln_reservation_wage_1W female $controls1 risk2 ocW_wmiss ocW_miss if weird~=1 & reservation_wage_1>=20000 & risk2~=.
	estadd ysumm
	estimates store m4
	
suest m1 m2, vce(robust)
test [m1_mean]female = [m2_mean]female
suest m3 m4, vce(robust)
test [m3_mean]female = [m4_mean]female

**********************************************************************************************************
* Table E1.C: Gender Gap in Log Earnings, Controlling for Risk Preferences and a Proxy for Biased Beliefs
**********************************************************************************************************

* accepted_earnings_add_oc_logs_combined.tex

use BU_grad_analysis_sample_aug2021.dta, clear 
keep if accepted==1

gen oc=(expected_totalpay-first_total_nt)/first_total_nt*100 if !missing(expected_totalpay) & !missing(first_total_nt)

winsor oc, gen(ocW) p(0.025)		//winsorizing top and bottom 2.5% 

replace first_industry=13 if accepted==1 & missing(first_industry)
macro define controls1 "i.cohort conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ"

replace offer_weekly_hrs = 999 if offer_weekly_hrs==.
gen offer_weekly_hrs_miss = offer_weekly_hrs==999
replace offer_weekly_hrs_miss=. if offer_weekly_hrs==.

macro define controls0 ""
macro define controls1 "i.cohort conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ"
macro define controls2 "i.first_industry i.cohort conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ" 
macro define controls3 "i.first_industry i.cohort conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ startsearch_mo" 

label var expected_totalpay "Expected Total Compensation"
label var trait_confidence "Perceived Relative Ability (1-5)"
label var female "Female"
label var risk2 "Risk Tolerance"

gen log_expected_totalpay = ln(expected_totalpay)
label var log_expected_totalpay "Log Expected Total Compensation"

eststo clear
eststo: reg log_first_total_nt female $controls1 if expected_totalpay~=., robust
estadd ysumm
eststo: reg log_first_total_nt female risk2 $controls1 if expected_totalpay~=., robust
estadd ysumm
eststo: reg log_first_total_nt female log_expected_totalpay $controls1 if expected_totalpay~=., robust
estadd ysumm
eststo: reg log_first_total_nt female risk2 log_expected_totalpay $controls1 if expected_totalpay~=., robust
estadd ysumm
eststo: xi: reg log_first_total_nt female $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss if expected_totalpay~=., robust
estadd ysumm
eststo: xi: reg log_first_total_nt female risk2 $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss if expected_totalpay~=., robust
estadd ysumm
eststo: xi: reg log_first_total_nt female log_expected_totalpay $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss if expected_totalpay~=., robust
estadd ysumm
eststo: xi: reg log_first_total_nt female risk2 log_expected_totalpay $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss if expected_totalpay~=., robust
estadd ysumm

# delimit ;
esttab * using "${figures}tableE1C_b.tex", l stats(ymean r2 N, fmt(2 3 0) labels("Mean" "\(R^{2}\)" "N"))  
nomti se b(%4.3f) starlevels(* 0.10 ** 0.05 *** 0.01) style(tex) 
booktabs collabels(none) gaps nonotes title("")
substitute([htbp] [!htbp] \begin{tabular} \small\begin{tabular} {l} {p{16cm}})
nobase addn( "Note: The dependent variable is the natural log of total accepted earnings in the first year (in 2017 dollars). Basic controls include cohort fixed effects, major fixed effects, 
GPA, dummy for US-born, and fixed effects for race, father's education, and mother's education. 
Additional controls include fixed effects for industry (19 groups), dummies for the location of the first job (country/state), and weekly hours of work. Robust standard errors in parentheses. ***significant at the 1\% level, **5\% level, *10\% level.") 
keep(female risk2 log_expected_totalpay) 
indicate("Controls = gpa" "Add. controls = _Ifirst_loc_2", labels("X" ""))
prehead(`"\begin{table}[htbp]\centering"' `"\footnotesize"'
`"\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}"'
`"\caption{Gender Gap in Accepted Earnings}"'
`"\bigskip"'
`"\begin{tabular}{l*{@M}{c}}"'
`"\toprule"' )
replace;

***************** CHECK IF COEFFICIENTS ARE STATISTICALLY DIFFERENT *************;
		
#delimit cr
	
eststo: reg log_first_total_nt female $controls1 if expected_totalpay~=.
estimates store a1
	
eststo: reg log_first_total_nt female risk2 log_expected_totalpay $controls1 if expected_totalpay~=.
estimates store a2
	
eststo: xi: reg log_first_total_nt female $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss if expected_totalpay~=.
estimates store a3
	
eststo: xi: reg log_first_total_nt female risk2 log_expected_totalpay $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss if expected_totalpay~=.
estimates store a4
	
suest a1 a2, vce(robust)
test [a1_mean]female = [a2_mean]female	
suest a3 a4, vce(robust)
test [a3_mean]female = [a4_mean]female	

/*2nd Proxy*/

est clear
eststo clear
eststo: reg log_first_total_nt female $controls1, robust
estadd ysumm
eststo: reg log_first_total_nt female risk2 $controls1, robust
estadd ysumm
eststo: reg log_first_total_nt female trait_confidence $controls1, robust
estadd ysumm
eststo: reg log_first_total_nt female trait_confidence risk2 $controls1, robust
estadd ysumm
eststo: xi: reg log_first_total_nt female $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss, robust
estadd ysumm
eststo: xi: reg log_first_total_nt female risk2 $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss, robust
estadd ysumm
eststo: xi: reg log_first_total_nt female trait_confidence $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss, robust
estadd ysumm
eststo: xi: reg log_first_total_nt female risk2 trait_confidence $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss, robust
estadd ysumm

# delimit ;
esttab * using "${figures}tableE1C_a.tex", l stats(ymean r2 N, fmt(2 3 0) labels("Mean" "\(R^{2}\)" "N"))  
nomti se b(%4.3f) starlevels(* 0.10 ** 0.05 *** 0.01) style(tex) 
booktabs collabels(none) gaps nonotes title("")
substitute([htbp] [!htbp] \begin{tabular} \small\begin{tabular} {l} {p{16cm}})
nobase addn( "Note: The dependent variable is the natural log of total accepted earnings in the first year (in 2017 dollars). Basic controls include cohort fixed effects, major fixed effects, 
GPA, dummy for US-born, and fixed effects for race, father's education, and mother's education. 
Additional controls include fixed effects for industry (19 groups), dummies for the location of the first job (country/state), and weekly hours of work. Robust standard errors in parentheses. ***significant at the 1\% level, **5\% level, *10\% level.") 
keep(female risk2 trait_confidence) 
indicate("Controls = gpa" "Add. controls = _Ifirst_loc_2", labels("X" ""))
prehead(`"\begin{table}[htbp]\centering"' `"\footnotesize"'
`"\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}"'
`"\caption{Gender Gap in Accepted Earnings}"'
`"\bigskip"'
`"\begin{tabular}{l*{@M}{c}}"'
`"\toprule"' )
replace;

#delimit cr
	
eststo: reg log_first_total_nt female $controls1
estimates store b1
	
eststo: reg log_first_total_nt female trait_confidence risk2 $controls1
estimates store b2
	
eststo: xi: reg log_first_total_nt female $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss
estimates store b3
	
eststo: xi: reg log_first_total_nt female risk2 trait_confidence $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss
estimates store b4
	
suest b1 b2, vce(robust)
test [b1_mean]female = [b2_mean]female	
suest b3 b4, vce(robust)
test [b3_mean]female = [b4_mean]female	

// E.II Omitting Earlier (2013 to 2015) Cohorts //

*********************************************************
* Figure E1I.A: CDF of Job Acceptance Timing, By Gender
*********************************************************

* figure1_2016to2019.pdf

	use BU_grad_analysis_sample_aug2021.dta, clear 
	keep if cohort>=2016
	
	replace accept_mo = -9 if accept_mo<-9 & accept_mo~=.
	replace accept_mo = 9 if accept_mo>9 & accept_mo~=. 
	
	ksmirnov accept_mo if accepted==1, by(gender) //KS test for month of accept by gender
	local ks_p: display %4.3f r(p)
	
	collapse (mean) mean=first_total_nt (count) num=first_total_nt if accepted==1, by(accept_mo gender)
	
	sort gender accept_mo
	gen raw_sum=mean*num
	by gender: gen cum_num= sum(num)
	by gender: gen cum_sum= sum(raw_sum)
	gen cum_mean=cum_sum/cum_num
		
	reshape wide mean num raw_sum cum_num cum_sum cum_mean, i(accept_mo) j(gender)
	gen gender_gap=cum_mean2-cum_mean1
	gen gender_gap_perc=(cum_mean2-cum_mean1)/cum_mean1
		
	local dig_gender_gap_perc 4.3
	local dig_gender_gap 5.1
	
	egen total_num1 = max(cum_num1)
	egen total_num2 = max(cum_num2)
	
	gen prop1=cum_num1/total_num1
	gen prop2=cum_num2/total_num2
		
	twoway  (line prop2 accept_mo if accept_mo>=-9 & accept_mo<=9, lwidth(medthick)) (line prop1 accept_mo if accept_mo>=-9 & accept_mo<=9, lp(dash) lwidth(medthick)) ///
	, graphregion(color(white))  ///
	xtitle("Months Since Graduation", size(small)) ytitle("Proportion Accepted a Job", size(small)) ///
	legend(label(1 "Male") label(2 "Female") size(small) rows(1) ) ylabel(, labsize(small)) xlabel(-9(3)9, labsize(small)) 	///
	note("KS p-val: `ks_p'", ring(0) pos(4) size(small))  
	
	graph export "${figures}figureE2A.pdf", replace

*********************************************************************************************
* Figure E1I.B: Cumulative Mean Accepted Earnings and Gender Gap by Months Since Graduation
*********************************************************************************************

* figure2_levels_2016to2019.pdf
* fact2_gap_levels_2016to2019.pdf

use BU_grad_analysis_sample_aug2021.dta, clear 
keep if cohort>=2016
keep if accepted==1
keep if abs(accept_mo)<=15
	
gen male = 1-female
replace first_industry=13 if accepted==1 & missing(first_industry)

gen first_benefit_matorpat= (first_benefit_maternity==1 | first_benefit_paternity==1) if !missing(first_benefit_maternity) & !missing(first_benefit_maternity)
	
foreach var of varlist exp_earn_growth_1yr{
	gen `var'_miss=missing(`var')
	replace `var'=0 if missing(`var')
	}

** Unconditional **

cap program drop fig2_levels
program fig2_levels, rclass
cap drop cum_mean0
cap drop cum_mean1
cap drop cum_mean0_se
cap drop cum_mean1_se
gen cum_mean0=.
gen cum_mean1=.
gen cum_mean0_se=.
gen cum_mean1_se=.

cap drop mo
gen mo=_n-16 if _n<=32

forvalues m=-9/9{
	local m_pos=`m'+16
	reg first_total_nt if accepted==1 & accept_mo<=`m' & female==0
	replace cum_mean0 = _b[_cons] in `m_pos'
	replace cum_mean0_se = _se[_cons] in `m_pos'
	reg first_total_nt if accepted==1 & accept_mo<=`m' & female==1
	replace cum_mean1 = _b[_cons] in `m_pos'
	replace cum_mean1_se = _se[_cons] in `m_pos'
}
reg cum_mean0 mo
return scalar slope_g0 = _b[mo]
reg cum_mean1 mo
return scalar slope_g1 = _b[mo]
end

cap program drop fig2_logs
program fig2_logs, rclass
cap drop log_cum_mean0
cap drop log_cum_mean1
cap drop log_cum_mean0_se
cap drop log_cum_mean1_se
gen log_cum_mean0=.
gen log_cum_mean1=.
gen log_cum_mean0_se=.
gen log_cum_mean1_se=.

cap drop mo
gen mo=_n-16 if _n<=32

forvalues m=-9/9{
	local m_pos=`m'+16
	reg log_first_total_nt if accepted==1 & accept_mo<=`m' & female==0
	replace log_cum_mean0 = _b[_cons] in `m_pos'
	replace log_cum_mean0_se = _se[_cons] in `m_pos'
	reg log_first_total_nt if accepted==1 & accept_mo<=`m' & female==1
	replace log_cum_mean1 = _b[_cons] in `m_pos'
	replace log_cum_mean1_se = _se[_cons] in `m_pos'
}
reg log_cum_mean0 mo
return scalar log_slope_g0 = _b[mo]
reg log_cum_mean1 mo
return scalar log_slope_g1 = _b[mo]
end

** PLOTTING THE FIGURE **

set more off 
set logtype text
cap log close

// getting the coefficients
fig2_levels
fig2_logs

reg cum_mean0 mo
local slope_g0: display %4.1f _b[mo]
reg cum_mean1 mo
local slope_g1: display %4.1f _b[mo]

reg log_cum_mean0 mo
local log_slope_g0: display %9.4f _b[mo]
reg log_cum_mean1 mo
local log_slope_g1: display %9.4f _b[mo]

cap drop ub_* lb_* log_ub_* log_lb_*
gen ub_g0 = cum_mean0 + 1.96*cum_mean0_se
gen lb_g0 = cum_mean0 - 1.96*cum_mean0_se

gen ub_g1 = cum_mean1 + 1.96*cum_mean1_se
gen lb_g1 = cum_mean1 - 1.96*cum_mean1_se

gen log_ub_g0 = log_cum_mean0 + 1.96*log_cum_mean0_se
gen log_lb_g0 = log_cum_mean0 - 1.96*log_cum_mean0_se

gen log_ub_g1 = log_cum_mean1 + 1.96*log_cum_mean1_se
gen log_lb_g1 = log_cum_mean1 - 1.96*log_cum_mean1_se

twoway  (rarea ub_g0 lb_g0 mo if mo>=-9 & mo<=9, lwidth(none) fcolor(navy%20)) (rarea ub_g1 lb_g1 mo if mo>=-9 & mo<=9, lwidth(none) fcolor(maroon%20))  ///
(line cum_mean0 mo if mo>=-9 & mo<=9, lcolor(navy)) (line cum_mean1 mo if mo>=-9 & mo<=9, lcolor(maroon) lp(dash)), ylabel(50000(10000)90000) xlabel(-9(3)9) ///
graphregion(color(white)) xtitle("Months Since Graduation", size(medsmall)) ytitle("Cumulative Mean Accepted Offer ($)" " ", size(medsmall))  ///
legend(order(3 "Male" 4 "Female") size(medsmall)) note("Male Slope: `slope_g0'***" "Female Slope: `slope_g1'***", ring(0) pos(2) size(small)) 

graph save figure2_levels_2016to2019.gph, replace
graph export "${figures}figureE2B_a.pdf", replace

** PANEL B **

** Residualizing **

macro define controls1 "i.cohort conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ"
macro define controls2 "i.cohort conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ i.first_industry"
macro define controls3 "i.cohort conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ i.first_industry i.first_benefit_flexwork i.first_benefit_maternity i.first_benefit_paternity i.first_benefit_sickleave i.first_benefit_childcare exp_earn_growth_1yr exp_earn_growth_1yr_miss"

* controls *

cap drop *_res1 
cap drop *_res2 
cap drop *_res3

set more off
reg first_total_nt $controls1
predict first_total_nt_res1, res
reg log_first_total_nt $controls1
predict log_first_total_nt_res1, res

reg first_total_nt $controls2
predict first_total_nt_res2, res
reg log_first_total_nt $controls2
predict log_first_total_nt_res2, res

reg first_total_nt $controls3
predict first_total_nt_res3, res
reg log_first_total_nt $controls3
predict log_first_total_nt_res3, res

set more off 
cap program drop gap_raw
program gap_raw, rclass
cap drop cum_gap cum_gap_se
cap drop log_cum_gap log_cum_gap_se
gen cum_gap=.
gen cum_gap_se=.
gen log_cum_gap=.
gen log_cum_gap_se=.
cap drop mo
gen mo=_n-16 if _n<=32

forvalues m=-9/9{
	local m_pos=`m'+16
	reg first_total_nt male if accepted==1 & accept_mo<=`m'
	replace cum_gap = _b[male] in `m_pos'
	replace cum_gap_se = _se[male] in `m_pos'
}

forvalues m=-9/9{
	local m_pos=`m'+16
	reg log_first_total_nt male if accepted==1 & accept_mo<=`m'
	replace log_cum_gap = _b[male] in `m_pos'
	replace log_cum_gap_se = _se[male] in `m_pos'
}

reg cum_gap mo
return scalar slope_mo = _b[mo]

reg log_cum_gap mo
return scalar log_slope_mo = _b[mo]
end

set more off 
cap program drop gap_res1
program gap_res1, rclass
cap drop cum_gap_res1 cum_gap_res1_se
cap drop log_cum_gap_res1 log_cum_gap_res1_se
gen cum_gap_res1=.
gen cum_gap_res1_se=.
gen log_cum_gap_res1=.
gen log_cum_gap_res1_se=.
cap drop mo
gen mo=_n-16 if _n<=32

forvalues m=-9/9{
	local m_pos=`m'+16
	reg first_total_nt_res1 male if accepted==1 & accept_mo<=`m'
	replace cum_gap_res1 = _b[male] in `m_pos'
	replace cum_gap_res1_se = _se[male] in `m_pos'
}

forvalues m=-9/9{
	local m_pos=`m'+16
	reg log_first_total_nt_res1 male if accepted==1 & accept_mo<=`m'
	replace log_cum_gap_res1 = _b[male] in `m_pos'
	replace log_cum_gap_res1_se = _se[male] in `m_pos'
}

reg cum_gap_res1 mo
return scalar slope_mo_res1 = _b[mo]

reg log_cum_gap_res1 mo
return scalar log_slope_mo_res1 = _b[mo]
end

cap program drop gap_res2
program gap_res2, rclass
cap drop cum_gap_res2 cum_gap_res2_se
cap drop log_cum_gap_res2 log_cum_gap_res2_se
gen cum_gap_res2=.
gen cum_gap_res2_se=.
gen log_cum_gap_res2=.
gen log_cum_gap_res2_se=.
cap drop mo
gen mo=_n-16 if _n<=32

forvalues m=-9/9{
	local m_pos=`m'+16
	reg first_total_nt_res2 male if accepted==1 & accept_mo<=`m'
	replace cum_gap_res2 = _b[male] in `m_pos'
	replace cum_gap_res2_se = _se[male] in `m_pos'
}

forvalues m=-9/9{
	local m_pos=`m'+16
	reg log_first_total_nt_res2 male if accepted==1 & accept_mo<=`m'
	replace log_cum_gap_res2 = _b[male] in `m_pos'
	replace log_cum_gap_res2_se = _se[male] in `m_pos'
}

reg cum_gap_res2 mo
return scalar slope_mo_res2 = _b[mo]

reg log_cum_gap_res2 mo
return scalar log_slope_mo_res2 = _b[mo]
end

cap program drop gap_res3
program gap_res3, rclass
cap drop cum_gap_res3 cum_gap_res3_se
cap drop log_cum_gap_res3 log_cum_gap_res3_se
gen cum_gap_res3=.
gen cum_gap_res3_se=.
gen log_cum_gap_res3=.
gen log_cum_gap_res3_se=.
cap drop mo
gen mo=_n-16 if _n<=32

forvalues m=-9/9{
	local m_pos=`m'+16
	reg first_total_nt_res3 male if accepted==1 & accept_mo<=`m'
	replace cum_gap_res3 = _b[male] in `m_pos'
	replace cum_gap_res3_se = _se[male] in `m_pos'
}

forvalues m=-9/9{
	local m_pos=`m'+16
	reg log_first_total_nt_res3 male if accepted==1 & accept_mo<=`m'
	replace log_cum_gap_res3 = _b[male] in `m_pos'
	replace log_cum_gap_res3_se = _se[male] in `m_pos'
}

reg cum_gap_res3 mo
return scalar slope_mo_res3 = _b[mo]

reg log_cum_gap_res3 mo
return scalar log_slope_mo_res3 = _b[mo]
end

** bootstrap SE **

parallel setclusters 8

set more off
capture log close
set logtype text
log using "${figures}figureE2B_b_SE", replace

cd "${tempfolder}"
parallel bs, exp(slope=r(slope_mo)) reps(1000) nodrop seed(3 3 3 3 3 3 3 3): gap_raw
parallel bs, exp(slope=r(slope_mo_res1)) reps(1000) nodrop seed(3 3 3 3 3 3 3 3): gap_res1
parallel bs, exp(slope=r(slope_mo_res2)) reps(1000) nodrop seed(3 3 3 3 3 3 3 3): gap_res2
parallel bs, exp(slope=r(slope_mo_res3)) reps(1000) nodrop seed(3 3 3 3 3 3 3 3): gap_res3

log close

** PLOTTING THE FIGURE **

// getting the coefficients

set more off
gap_raw
gap_res1
gap_res2
gap_res3

reg cum_gap mo
local slope: display %4.1f _b[mo]
reg cum_gap_res1 mo
local slope_res1: display %4.1f _b[mo]
reg cum_gap_res2 mo
local slope_res2: display %4.1f _b[mo]
reg cum_gap_res3 mo
local slope_res3: display %4.1f _b[mo]

# delimit ;
graph tw line cum_gap mo if mo>=-9 & mo<=9, msize(small) lpattern(solid) || 
line cum_gap_res1 mo if mo>=-9 & mo<=9, msize(small) lpattern(longdash) ||
line cum_gap_res2 mo if mo>=-9 & mo<=9, msize(small) lpattern(dash_dot) 
ylabel(0(2000)12000, labsize(small)) xlabel(-9(3)9, labsize(small))
xtitle("Months Since Graduation", size(small)) ytitle("Cumulative Gender Gap in Residualized Earnings ($)" "", size(small))
legend(label(1 "No Controls") label(2 "Basic Controls") label(3 "Basic Controls + Industry FE") size(small))
note("Slope (no controls): `slope'" "Slope (w/ basic controls): `slope_res1'" 
"Slope (w/ basic controls + industry FE): `slope_res2'", ring(0) pos(2) size(small))
graphregion(color(white));

graph export "${figures}figureE2B_b.pdf", replace;

****************************************************************************************************************
* Table E1I.A: Gender Gap in Accepted Earnings, Controlling for Risk Preferences and a Proxy for Biased Beliefs
****************************************************************************************************************

* accepted_earnings_add_oc_omit_combined.tex

use BU_grad_analysis_sample_aug2021.dta, clear 
keep if cohort>=2016
keep if accepted==1

gen oc=(expected_totalpay-first_total_nt)/first_total_nt*100 if !missing(expected_totalpay) & !missing(first_total_nt)
**oc1 winsorizes 100 and above to 100; try alternative winsorizing
winsor oc, gen(ocW) p(0.025)		//winsorizing top and bottom 2.5% 

replace first_industry=13 if accepted==1 & missing(first_industry)
macro define controls1 "i.cohort conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ"

replace offer_weekly_hrs = 999 if offer_weekly_hrs==.
gen offer_weekly_hrs_miss = offer_weekly_hrs==999
replace offer_weekly_hrs_miss=. if offer_weekly_hrs==.

** Estimating the earnings regressions in Table A.8 including expected totalpay or perceived ability as a control **

macro define controls0 ""
macro define controls1 "i.cohort conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ"
macro define controls2 "i.first_industry i.cohort conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ" 
macro define controls3 "i.first_industry i.cohort conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ startsearch_mo" 

label var expected_totalpay "Expected Total Compensation"
label var trait_confidence "Perceived Relative Ability (1-5)"
label var female "Female"
label var risk2 "Risk Tolerance"

est clear
eststo clear
eststo: reg first_total_nt female $controls1 if expected_totalpay~=., robust
estadd ysumm
eststo: reg first_total_nt female risk2 $controls1 if expected_totalpay~=., robust
estadd ysumm
eststo: reg first_total_nt female expected_totalpay $controls1 if expected_totalpay~=., robust
estadd ysumm
eststo: reg first_total_nt female risk2 expected_totalpay $controls1 if expected_totalpay~=., robust
estadd ysumm
eststo: xi: reg first_total_nt female $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss if expected_totalpay~=., robust
estadd ysumm
eststo: xi: reg first_total_nt female risk2 $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss if expected_totalpay~=., robust
estadd ysumm
eststo: xi: reg first_total_nt female expected_totalpay $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss if expected_totalpay~=., robust
estadd ysumm
eststo: xi: reg first_total_nt female risk2 expected_totalpay $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss if expected_totalpay~=., robust
estadd ysumm

# delimit ;
esttab * using "${figures}tableE2A_b.tex", l stats(ymean r2 N, fmt(0 3 0) labels("Mean" "\(R^{2}\)" "N"))  
nomti se b(%4.1f) starlevels(* 0.10 ** 0.05 *** 0.01) style(tex) 
booktabs collabels(none) gaps nonotes title("")
substitute([htbp] [!htbp] \begin{tabular} \small\begin{tabular} {l} {p{16cm}})
nobase addn( "Note: The dependent variable is total accepted earnings in the first year in 2017 dollars. Basic controls include cohort fixed effects, major fixed effects, 
GPA, dummy for US-born, and fixed effects for race, father's education, and mother's education. 
Additional controls include fixed effects for industry (19 groups), dummies for the location of the first job (country/state), and weekly hours of work. Robust standard errors in parentheses. ***significant at the 1\% level, **5\% level, *10\% level.") 
keep(female risk2 expected_totalpay) 
indicate("Controls = gpa" "Add. controls = _Ifirst_loc_2", labels("X" ""))
prehead(`"\begin{table}[htbp]\centering"' `"\footnotesize"'
`"\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}"'
`"\caption{Gender Gap in Accepted Earnings}"'
`"\bigskip"'
`"\begin{tabular}{l*{@M}{c}}"'
`"\toprule"' )
replace;

***************** CHECK IF COEFFICIENTS ARE STATISTICALLY DIFFERENT *************;
		
#delimit cr
	
eststo: reg first_total_nt female $controls1 if expected_totalpay~=.
estimates store a1
eststo: reg first_total_nt female risk2 expected_totalpay $controls1 if expected_totalpay~=.
estimates store a2
eststo: xi: reg first_total_nt female $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss if expected_totalpay~=.
estimates store a3
eststo: xi: reg first_total_nt female risk2 expected_totalpay $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss if expected_totalpay~=.
estimates store a4
	
suest a1 a2, vce(robust)
test [a1_mean]female = [a2_mean]female	
suest a3 a4, vce(robust)
test [a3_mean]female = [a4_mean]female	

/*2nd Proxy*/

est clear
eststo clear
eststo: reg first_total_nt female $controls1, robust
estadd ysumm
eststo: reg first_total_nt female risk2 $controls1, robust
estadd ysumm
eststo: reg first_total_nt female trait_confidence $controls1, robust
estadd ysumm
eststo: reg first_total_nt female trait_confidence risk2 $controls1, robust
estadd ysumm
eststo: xi: reg first_total_nt female $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss, robust
estadd ysumm
eststo: xi: reg first_total_nt female risk2 $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss, robust
estadd ysumm
eststo: xi: reg first_total_nt female trait_confidence $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss, robust
estadd ysumm
eststo: xi: reg first_total_nt female risk2 trait_confidence $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss, robust
estadd ysumm

# delimit ;
esttab * using "${figures}tableE2A_a.tex", l stats(ymean r2 N, fmt(0 3 0) labels("Mean" "\(R^{2}\)" "N"))  
nomti se b(%4.0f) starlevels(* 0.10 ** 0.05 *** 0.01) style(tex) 
booktabs collabels(none) gaps nonotes title("")
substitute([htbp] [!htbp] \begin{tabular} \small\begin{tabular} {l} {p{16cm}})
nobase addn( "Note: The dependent variable is total accepted earnings in the first year in 2017 dollars. Basic controls include cohort fixed effects, major fixed effects, 
GPA, dummy for US-born, and fixed effects for race, father's education, and mother's education. 
Additional controls include fixed effects for industry (19 groups), dummies for the location of the first job (country/state), and weekly hours of work. Robust standard errors in parentheses. ***significant at the 1\% level, **5\% level, *10\% level.") 
keep(female risk2 trait_confidence) 
indicate("Controls = gpa" "Add. controls = _Ifirst_loc_2", labels("X" ""))
prehead(`"\begin{table}[htbp]\centering"' `"\footnotesize"'
`"\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}"'
`"\caption{Gender Gap in Accepted Earnings}"'
`"\bigskip"'
`"\begin{tabular}{l*{@M}{c}}"'
`"\toprule"' )
replace;

#delimit cr
	
eststo: reg first_total_nt female $controls1
estimates store b1
eststo: reg first_total_nt female trait_confidence risk2 $controls1
estimates store b2
eststo: xi: reg first_total_nt female $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss
estimates store b3
eststo: xi: reg first_total_nt female risk2 trait_confidence $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss
estimates store b4
	
suest b1 b2, vce(robust)
test [b1_mean]female = [b2_mean]female	
suest b3 b4, vce(robust)
test [b3_mean]female = [b4_mean]female	

// E.III Winsorizing vs. Omitting Outliers //

*****************************************************************************************************
* Figure EIII.A: Cumulative Mean Accepted Earnings and Gender Gap by Months Since Graduation 
*****************************************************************************************************

* figure2_levelsW.pdf
* fact2_gap_levelsW.pdf

use BU_grad_analysis_sample_june2022, clear

capture drop race_check
egen race_check = rsum(race_white race_black race_am_india race_latino race_asia_pacif miss_race)
tab race_check
tab race_check if miss_race==1

cap drop race_check
egen race_check = rsum(race_white race_black race_am_india race_latino race_asia_pacif miss_race)
tab race_check	
	
capture drop fa_educ mo_educ 
capture drop trait_risk_daily2 trait_risk_finance2 risk2 
capture drop accept_*after_*

replace fa_ba=9 if fa_edu==""
replace mo_ba=9 if mo_edu==""

gen fa_educ = .
replace fa_educ = 1 if fa_edu == "Less than HS" | fa_edu == "Did not finish high school" | fa_edu == "MIDDLE SCHOOL" | fa_edu == "My Father Has No Degrees" | fa_edu == "no degree" | fa_edu == "middle school" | fa_edu == "equivalent of middle school in ecuador " | fa_edu == "He only completed up to middle school " | fa_edu == "Did not finish high school "
replace fa_educ = 2 if fa_edu == "High School Degree"
replace fa_educ = 3 if fa_edu == "Some College/Associate Degree" | fa_edu == "Trade School" | fa_edu=="CPA"
replace fa_educ = 4 if fa_edu == "Bachelor (B.A., B.S)"
replace fa_educ = 5 if fa_edu == "Medicine (M.D.)" | fa_edu == "DDS" | fa_edu == "B.S. and Pharm. D " | fa_edu == "DMD"
replace fa_educ = 6 if fa_edu == "Masters (M.A., M.S., M.F.A.)" | fa_edu == "MBA" | fa_edu == "JD and an MBA"
replace fa_educ = 7 if fa_edu == "Law (J.D.)"
replace fa_educ = 8 if fa_edu == "Doctorate (Ph.D)" 
*replace fa_educ = 9 if fa_educ ==. 					// changed in dec 2020
replace fa_educ = 99 if fa_edu == "" | fa_educ==.		// changed in dec 2020

gen mo_educ = .
replace mo_educ = 1 if mo_edu == "Less than HS" | mo_edu == "Did not finish high school" | mo_edu == "MIDDLE SCHOOL" | mo_edu == "My mother Has No Degrees" | mo_edu == "no degree" | mo_edu == "middle school" | mo_edu == "equivalent of middle school in ecuador " | mo_edu == "He only completed up to middle school " | mo_edu == "Did not finish high school " | mo_edu=="Didn't complete high school" | mo_edu == "GED"
replace mo_educ = 2 if mo_edu == "High School Degree"
replace mo_educ = 3 if mo_edu == "Some College/Associate Degree" | mo_edu == "Trade School" | mo_edu=="CPA"
replace mo_educ = 4 if mo_edu == "Bachelor (B.A., B.S)"
replace mo_educ = 5 if mo_edu == "Medicine (M.D.)" | mo_edu == "Doctor of Veterinary Medicine" | mo_edu == "DDS" | mo_edu == "B.S. and Pharm. D "
replace mo_educ = 6 if mo_edu == "Masters (M.A., M.S., M.F.A.)" | mo_edu == "MBA" | mo_edu == "JD and an MBA"
replace mo_educ = 7 if mo_edu == "Law (J.D.)"
replace mo_educ = 8 if mo_edu == "Doctorate (Ph.D)"
*replace mo_educ = 9 if mo_educ ==. 
*replace mo_educ = . if mo_edu == ""
replace mo_educ = 99 if mo_edu == "" | mo_educ==.		// changed in dec 2020

capture drop first_location
gen first_location = first_state if first_country== "United States of America (USA)" | first_country== "United States"
replace first_location = first_country if first_location==""
replace first_location = "United States of America (USA)" if first_location=="United States"
replace first_location = "Not Specified" if first_location==""

capture drop first_job_us
gen first_job_us = first_country== "United States of America (USA)" | first_country== "United States"
replace first_job_us=. if first_country==""

capture drop first_location_v2
gen first_location_v2 = "Not USA" if first_job_us==0
replace first_location_v2 = first_location if first_job_us==1
replace first_location_v2 = "Not Specified" if first_location_v2==""

recode trait_risk_daily (1=2), gen(trait_risk_daily2)
replace trait_risk_daily2 = trait_risk_daily2 -1

recode trait_risk_finance (1=2), gen(trait_risk_finance2)
replace trait_risk_finance2 = trait_risk_finance2 -1

egen risk2 = rmean(trait_risk_daily2 trait_risk_finance2)
replace risk2 = . if trait_risk_daily2==.
sum risk2

gen accept_after_grad = accept_mo>0
replace accept_after_grad=. if accept_mo==.

gen accept_3mafter_grad = accept_mo>=3
replace accept_3mafter_grad=. if accept_mo==.

gen accept_6mafter_grad = accept_mo>=6
replace accept_6mafter_grad=. if accept_mo==.

gen accept_9mafter_grad = accept_mo>=9
replace accept_9mafter_grad=. if accept_mo==.

*removing self-employed from accepted sample*

tab jobsearch_3 if accepted==1
replace accepted=0 if jobsearch_3==2
assert accepted==0 if jobsearch_3==2

keep if accepted==1

* winsorizing earnings variable instead of dropping outliers

gen first_total_ntW = first_total_nt
replace first_total_ntW = 20000 if first_total_nt<20000 & first_total_nt~=.
replace first_total_ntW = 175000 if first_total_nt>175000 & first_total_nt~=.

gen log_first_total_ntW = ln(first_total_ntW)

save temp_windsor, replace

**

use temp_windsor, clear
keep if accepted==1
keep if abs(accept_mo)<=15
	
gen male = 1-female
replace first_industry=13 if accepted==1 & missing(first_industry)

gen first_benefit_matorpat= (first_benefit_maternity==1 | first_benefit_paternity==1) if !missing(first_benefit_maternity) & !missing(first_benefit_maternity)
	
foreach var of varlist exp_earn_growth_1yr{
	gen `var'_miss=missing(`var')
	replace `var'=0 if missing(`var')
	}

cap program drop fig2_levels
program fig2_levels, rclass
cap drop cum_mean0
cap drop cum_mean1
cap drop cum_mean0_se
cap drop cum_mean1_se
gen cum_mean0=.
gen cum_mean1=.
gen cum_mean0_se=.
gen cum_mean1_se=.

cap drop mo
gen mo=_n-16 if _n<=32

forvalues m=-9/9{
	local m_pos=`m'+16
	reg first_total_ntW if accepted==1 & accept_mo<=`m' & female==0
	replace cum_mean0 = _b[_cons] in `m_pos'
	replace cum_mean0_se = _se[_cons] in `m_pos'
	reg first_total_ntW if accepted==1 & accept_mo<=`m' & female==1
	replace cum_mean1 = _b[_cons] in `m_pos'
	replace cum_mean1_se = _se[_cons] in `m_pos'
}
reg cum_mean0 mo
return scalar slope_g0 = _b[mo]
reg cum_mean1 mo
return scalar slope_g1 = _b[mo]
end

** PLOTTING THE FIGURE **

set more off 

// getting the coefficients
fig2_levels
fig2_logs

reg cum_mean0 mo
local slope_g0: display %4.1f _b[mo]
reg cum_mean1 mo
local slope_g1: display %4.1f _b[mo]

cap drop ub_* lb_* log_ub_* log_lb_*
gen ub_g0 = cum_mean0 + 1.96*cum_mean0_se
gen lb_g0 = cum_mean0 - 1.96*cum_mean0_se

gen ub_g1 = cum_mean1 + 1.96*cum_mean1_se
gen lb_g1 = cum_mean1 - 1.96*cum_mean1_se

twoway  (rarea ub_g0 lb_g0 mo if mo>=-9 & mo<=9, lwidth(none) fcolor(navy%20)) (rarea ub_g1 lb_g1 mo if mo>=-9 & mo<=9, lwidth(none) fcolor(maroon%20))  ///
(line cum_mean0 mo if mo>=-9 & mo<=9, lcolor(navy)) (line cum_mean1 mo if mo>=-9 & mo<=9, lcolor(maroon) lp(dash)), ylabel(50000(10000)90000) xlabel(-9(3)9) ///
graphregion(color(white)) xtitle("Months Since Graduation", size(medsmall)) ytitle("Cumulative Mean Accepted Offer ($)" " ", size(medsmall))  ///
legend(order(3 "Male" 4 "Female") size(medsmall)) name(figure2, replace) note("Male Slope: `slope_g0'***" "Female Slope: `slope_g1'***", ring(0) pos(2) size(small)) 

graph export "${figures}figureE3A_a.pdf", replace

** Residualizing **

macro define controls1 "i.cohort conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ"
macro define controls2 "i.cohort conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ i.first_industry"
macro define controls3 "i.cohort conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ i.first_industry i.first_benefit_flexwork i.first_benefit_maternity i.first_benefit_paternity i.first_benefit_sickleave i.first_benefit_childcare exp_earn_growth_1yr exp_earn_growth_1yr_miss"

* controls *

cap drop *_res1 
cap drop *_res2 
cap drop *_res3

set more off
reg first_total_ntW $controls1
predict first_total_nt_res1W, res

reg first_total_ntW $controls2
predict first_total_nt_res2W, res

reg first_total_ntW $controls3
predict first_total_nt_res3W, res

set more off 
cap program drop gap_raw
program gap_raw, rclass
cap drop cum_gap cum_gap_se
gen cum_gap=.
gen cum_gap_se=.
cap drop mo
gen mo=_n-16 if _n<=32

forvalues m=-9/9{
	local m_pos=`m'+16
	reg first_total_ntW male if accepted==1 & accept_mo<=`m'
	replace cum_gap = _b[male] in `m_pos'
	replace cum_gap_se = _se[male] in `m_pos'
}

reg cum_gap mo
return scalar slope_mo = _b[mo]

end

set more off 
cap program drop gap_res1
program gap_res1, rclass
cap drop cum_gap_res1 cum_gap_res1_se
gen cum_gap_res1=.
gen cum_gap_res1_se=.
cap drop mo
gen mo=_n-16 if _n<=32

forvalues m=-9/9{
	local m_pos=`m'+16
	reg first_total_nt_res1W male if accepted==1 & accept_mo<=`m'
	replace cum_gap_res1 = _b[male] in `m_pos'
	replace cum_gap_res1_se = _se[male] in `m_pos'
}

reg cum_gap_res1 mo
return scalar slope_mo_res1 = _b[mo]

end

cap program drop gap_res2
program gap_res2, rclass
cap drop cum_gap_res2 cum_gap_res2_se
gen cum_gap_res2=.
gen cum_gap_res2_se=.
cap drop mo
gen mo=_n-16 if _n<=32

forvalues m=-9/9{
	local m_pos=`m'+16
	reg first_total_nt_res2W male if accepted==1 & accept_mo<=`m'
	replace cum_gap_res2 = _b[male] in `m_pos'
	replace cum_gap_res2_se = _se[male] in `m_pos'
}

reg cum_gap_res2 mo
return scalar slope_mo_res2 = _b[mo]

end

cap program drop gap_res3
program gap_res3, rclass
cap drop cum_gap_res3 cum_gap_res3_se
gen cum_gap_res3=.
gen cum_gap_res3_se=.

cap drop mo
gen mo=_n-16 if _n<=32

forvalues m=-9/9{
	local m_pos=`m'+16
	reg first_total_nt_res3W male if accepted==1 & accept_mo<=`m'
	replace cum_gap_res3 = _b[male] in `m_pos'
	replace cum_gap_res3_se = _se[male] in `m_pos'
}

reg cum_gap_res3 mo
return scalar slope_mo_res3 = _b[mo]

end

** PLOTTING THE FIGURE **

// getting the coefficients

set more off
gap_raw
gap_res1
gap_res2
gap_res3

reg cum_gap mo
local slope: display %4.1f _b[mo]
reg cum_gap_res1 mo
local slope_res1: display %4.1f _b[mo]
reg cum_gap_res2 mo
local slope_res2: display %4.1f _b[mo]
reg cum_gap_res3 mo
local slope_res3: display %4.1f _b[mo]

# delimit ;
graph tw line cum_gap mo if mo>=-9 & mo<=9, msize(small) lpattern(solid) || 
line cum_gap_res1 mo if mo>=-9 & mo<=9, msize(small) lpattern(longdash) ||
line cum_gap_res2 mo if mo>=-9 & mo<=9, msize(small) lpattern(dash_dot) 
ylabel(0(5000)15000, labsize(small)) xlabel(-9(3)9, labsize(small))
xtitle("Months Since Graduation", size(small)) ytitle("Cumulative Gender Gap in Residualized Earnings ($)" "", size(small))
legend(label(1 "No Controls") label(2 "Basic Controls") label(3 "Basic Controls + Industry FE") size(small))
note("Slope (no controls): `slope'" "Slope (w/ basic controls): `slope_res1'*" 
"Slope (w/ basic controls + industry FE): `slope_res2'*", ring(0) pos(2) size(small))
graphregion(color(white));

graph export "${figures}figureE3A_b.pdf", replace;

** bootstrap SE **

parallel setclusters 8

set more off
capture log close
set logtype text
log using "${figures}figureE3A_b_SE", replace

cd "${tempfolder}"
parallel bs, exp(slope=r(slope_mo)) reps(1000) nodrop seed(3 3 3 3 3 3 3 3): gap_raw
parallel bs, exp(slope=r(slope_mo_res1)) reps(1000) nodrop seed(3 3 3 3 3 3 3 3): gap_res1
parallel bs, exp(slope=r(slope_mo_res2)) reps(1000) nodrop seed(3 3 3 3 3 3 3 3): gap_res2
parallel bs, exp(slope=r(slope_mo_res3)) reps(1000) nodrop seed(3 3 3 3 3 3 3 3): gap_res3

parallel bs, exp(slope=r(slope_g0)) reps(1000) nodrop seed(3 3 3 3 3 3 3 3): fig2_levels
parallel bs, exp(slope=r(slope_g1)) reps(1000) nodrop seed(3 3 3 3 3 3 3 3): fig2_levels

** in logs **
parallel bs, exp(slope=r(log_slope_mo)) reps(1000) nodrop seed(3 3 3 3 3 3 3 3): gap_raw
parallel bs, exp(slope=r(log_slope_mo_res1)) reps(1000) nodrop seed(3 3 3 3 3 3 3 3): gap_res1
parallel bs, exp(slope=r(log_slope_mo_res2)) reps(1000) nodrop seed(3 3 3 3 3 3 3 3): gap_res2
parallel bs, exp(slope=r(log_slope_mo_res3)) reps(1000) nodrop seed(3 3 3 3 3 3 3 3): gap_res3

parallel bs, exp(slope=r(log_slope_g0)) reps(1000) nodrop seed(3 3 3 3 3 3 3 3): fig2_levels
parallel bs, exp(slope=r(log_slope_g1)) reps(1000) nodrop seed(3 3 3 3 3 3 3 3): fig2_levels

log close


******************************************************************************************************************************
* Table EIII.A: Gender Gap in Accepted Earnings (Winsorized), Controlling for Risk Preferences and a Proxy for Biased Beliefs
******************************************************************************************************************************

* accepted_earnings_add_oc_winsor_combined.tex

use temp_windsor, clear

	replace first_industry=13 if accepted==1 & missing(first_industry)

	gen first_benefit_matorpat= (first_benefit_maternity==1 | first_benefit_paternity==1) if !missing(first_benefit_maternity) & !missing(first_benefit_maternity)
	foreach var of varlist exp_earn_growth_1yr{
		gen `var'_miss=missing(`var')
		replace `var'=0 if missing(`var')
	}
	
	label var female "Female"
	label var risk2 "Risk Tolerance (1-6)"
	label var over_conf "Overconfident (0/1)"
	label var oc1 "(exp-real)/real*100"
	label var exp_earn_growth_1yr "12mo Exp. Earn Grow"
	
	label var first_benefit_maternity "Maternity Leave"
	label var first_benefit_paternity "Paternity Leave" 
	label var first_benefit_flexwork "Flexible Hours"
	label var first_benefit_sickleave "Sick Leave"
	label var first_benefit_childcare "Childcare"
	label var offer_weekly_hrs "Weekly Hours"
	label var first_benefit_matorpat "Parental Leave"
	
	gen debt_1000=student_debt/1000 if !missing(student_debt)
	
	foreach var of varlist debt_1000 startsearch_mo oc1 confidence_measure patience_3 procrastindex_3 over_conf{
		gen miss_`var'=missing(`var')
		replace `var'=0 if missing(`var')
	}
	
	label var female "Female"
	label var risk2 "Risk Tolerance"
	label var startsearch_mo "Mo. Start Search"
	label var oc1 "Exp-Real (pp)"
	label var confidence_measure "Residual Overconf."
	label var procrastindex_3 "Procrast"
	label var patience_3 "Patience"
	label var high_risk2 "Risk Tol. $\geq$ 5"
	label var confidence_measure "Overconfidence (Resid)"
	label var first_hours "Hours per Week"
	label var debt_1000 "Debt (1,000)"
	
	gen risk_tol_lab=""
	replace risk2=floor(risk2)
	forval i=1(2)6{
		replace risk_tol_lab="Risk Tol. = `i'" if risk2==`i'
	}
	
	replace offer_weekly_hrs = 999 if offer_weekly_hrs==.
	gen offer_weekly_hrs_miss = offer_weekly_hrs==999
	replace offer_weekly_hrs_miss=. if offer_weekly_hrs==.
	
	*labmask risk2, values(risk_tol_lab)	
	
	set more off
	macro define controls1 "i.cohort conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ"

label var expected_totalpay "Expected Total Compensation"
label var trait_confidence "Perceived Relative Ability (1-5)"
label var female "Female"
label var risk2 "Risk Tolerance"

est clear
eststo clear
eststo: reg first_total_ntW female $controls1 if expected_totalpay~=., robust
estadd ysumm
eststo: reg first_total_ntW female risk2 $controls1 if expected_totalpay~=., robust
estadd ysumm
eststo: reg first_total_ntW female expected_totalpay $controls1 if expected_totalpay~=., robust
estadd ysumm
eststo: reg first_total_ntW female risk2 expected_totalpay $controls1 if expected_totalpay~=., robust
estadd ysumm
eststo: xi: reg first_total_ntW female $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss if expected_totalpay~=., robust
estadd ysumm
eststo: xi: reg first_total_ntW female risk2 $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss if expected_totalpay~=., robust
estadd ysumm
eststo: xi: reg first_total_ntW female expected_totalpay $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss if expected_totalpay~=., robust
estadd ysumm
eststo: xi: reg first_total_ntW female risk2 expected_totalpay $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss if expected_totalpay~=., robust
estadd ysumm

# delimit ;
esttab * using "${figures}tableE3A_b.tex", l stats(ymean r2 N, fmt(0 3 0) labels("Mean" "\(R^{2}\)" "N"))  
nomti se b(%4.1f) starlevels(* 0.10 ** 0.05 *** 0.01) style(tex) 
booktabs collabels(none) gaps nonotes title("")
substitute([htbp] [!htbp] \begin{tabular} \small\begin{tabular} {l} {p{16cm}})
nobase addn( "Note: The dependent variable is total accepted earnings (winsorized) in the first year in 2017 dollars. For this outcome, instead of dropping outliers (individuals who earn below \textdollar 20,000 and above \textdollar 175,000), we winsorize earnings above \textdollar 175,000 and below \textdollar 20,000. Basic controls include cohort fixed effects, major fixed effects, 
GPA, dummy for US-born, and fixed effects for race, father's education, and mother's education. 
Additional controls include fixed effects for industry (19 groups), dummies for the location of the first job (country/state), and weekly hours of work. Robust standard errors in parentheses. ***significant at the 1\% level, **5\% level, *10\% level.") 
keep(female risk2 expected_totalpay) 
indicate("Controls = gpa" "Add. controls = _Ifirst_loc_2", labels("X" ""))
prehead(`"\begin{table}[htbp]\centering"' `"\footnotesize"'
`"\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}"'
`"\caption{Gender Gap in Accepted Earnings}"'
`"\bigskip"'
`"\begin{tabular}{l*{@M}{c}}"'
`"\toprule"' )
replace;

***************** CHECK IF COEFFICIENTS ARE STATISTICALLY DIFFERENT *************;
		
#delimit cr
	
eststo: reg first_total_ntW female $controls1 if expected_totalpay~=.
estimates store a1
eststo: reg first_total_ntW female risk2 expected_totalpay $controls1 if expected_totalpay~=.
estimates store a2
eststo: xi: reg first_total_ntW female $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss if expected_totalpay~=.
estimates store a3
eststo: xi: reg first_total_ntW female risk2 expected_totalpay $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss if expected_totalpay~=.
estimates store a4
	
suest a1 a2, vce(robust)
test [a1_mean]female = [a2_mean]female	
suest a3 a4, vce(robust)
test [a3_mean]female = [a4_mean]female	

/*2nd Proxy*/

eststo clear
eststo: reg first_total_ntW female $controls1, robust
estadd ysumm
eststo: reg first_total_ntW female risk2 $controls1, robust
estadd ysumm
eststo: reg first_total_ntW female trait_confidence $controls1, robust
estadd ysumm
eststo: reg first_total_ntW female trait_confidence risk2 $controls1, robust
estadd ysumm
eststo: xi: reg first_total_ntW female $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss, robust
estadd ysumm
eststo: xi: reg first_total_ntW female risk2 $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss, robust
estadd ysumm
eststo: xi: reg first_total_ntW female trait_confidence $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss, robust
estadd ysumm
eststo: xi: reg first_total_ntW female risk2 trait_confidence $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss, robust
estadd ysumm

# delimit ;
esttab * using "${figures}tableE3A_a.tex", l stats(ymean r2 N, fmt(0 3 0) labels("Mean" "\(R^{2}\)" "N"))  
nomti se b(%4.0f) starlevels(* 0.10 ** 0.05 *** 0.01) style(tex) 
booktabs collabels(none) gaps nonotes title("")
substitute([htbp] [!htbp] \begin{tabular} \small\begin{tabular} {l} {p{16cm}})
nobase addn( "Note: The dependent variable is total accepted earnings (winsorized) in the first year in 2017 dollars. For this outcome, instead of dropping outliers (individuals who earn below \textdollar 20,000 and above \textdollar 175,000), we winsorize earnings above \textdollar 175,000 and below \textdollar 20,000. Basic controls include cohort fixed effects, major fixed effects, 
GPA, dummy for US-born, and fixed effects for race, father's education, and mother's education. 
Additional controls include fixed effects for industry (19 groups), dummies for the location of the first job (country/state), and weekly hours of work. Robust standard errors in parentheses. ***significant at the 1\% level, **5\% level, *10\% level.") 
keep(female risk2 trait_confidence) 
indicate("Controls = gpa" "Add. controls = _Ifirst_loc_2", labels("X" ""))
prehead(`"\begin{table}[htbp]\centering"' `"\footnotesize"'
`"\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}"'
`"\caption{Gender Gap in Accepted Earnings}"'
`"\bigskip"'
`"\begin{tabular}{l*{@M}{c}}"'
`"\toprule"' )
replace;

#delimit cr
	
eststo: reg first_total_ntW female $controls1
estimates store b1
eststo: reg first_total_ntW female trait_confidence risk2 $controls1
estimates store b2
eststo: xi: reg first_total_ntW female $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss
estimates store b3
eststo: xi: reg first_total_ntW female risk2 trait_confidence $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss
estimates store b4
	
suest b1 b2, vce(robust)
test [b1_mean]female = [b2_mean]female	
suest b3 b4, vce(robust)
test [b3_mean]female = [b4_mean]female	



